diff --git a/.mailmap b/.mailmap index e51d76df75c2deede27219df6fd0d1a30065fe49..8ee01d9d70464bc0012d1bf87ca8fcef7ccd7545 100644 --- a/.mailmap +++ b/.mailmap @@ -166,6 +166,7 @@ Daniel Borkmann Daniel Borkmann David Brownell David Collins +David Heidelberg David Rheinsberg David Rheinsberg David Rheinsberg diff --git a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu index 307a55f599cb1716f229fd95651aadcd30ad24c3..35a8f6dae5bfdd35fb0e61131d9f63d97d8a4c50 100644 --- a/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu +++ b/Documentation/ABI/testing/sysfs-bus-i2c-devices-turris-omnia-mcu @@ -32,9 +32,9 @@ Description: (RW) The front button on the Turris Omnia router can be interrupt. This file switches between these two modes: - - "mcu" makes the button press event be handled by the MCU to - change the LEDs panel intensity. - - "cpu" makes the button press event be handled by the CPU. + - ``mcu`` makes the button press event be handled by the MCU to + change the LEDs panel intensity. + - ``cpu`` makes the button press event be handled by the CPU. Format: %s. diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index 325873385b71fbf7830c917087f6fa1ff75044ec..de725ca3be825ba0e289afbf6d44d97f3760a477 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -562,7 +562,8 @@ Description: Control Symmetric Multi Threading (SMT) ================ ========================================= If control status is "forceoff" or "notsupported" writes - are rejected. + are rejected. Note that enabling SMT on PowerPC skips + offline cores. What: /sys/devices/system/cpu/cpuX/power/energy_perf_bias Date: March 2019 diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index fd4b56c0996f47d6f11c93db442eabca3f252264..c09674a75a9e3c712339b3dd3b830397dadf4c84 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -742,7 +742,7 @@ SecurityFlags Flags which control security negotiation and may use NTLMSSP 0x00080 must use NTLMSSP 0x80080 seal (packet encryption) 0x00040 - must seal (not implemented yet) 0x40040 + must seal 0x40040 cifsFYI If set to non-zero value, additional debug information will be logged to the system error log. This field diff --git a/Documentation/admin-guide/device-mapper/dm-crypt.rst b/Documentation/admin-guide/device-mapper/dm-crypt.rst index e625830d335ea70c7692ec7005e524900e729f36..552c9155165d7c89920f5bbd8d38a02e01386822 100644 --- a/Documentation/admin-guide/device-mapper/dm-crypt.rst +++ b/Documentation/admin-guide/device-mapper/dm-crypt.rst @@ -162,13 +162,14 @@ iv_large_sectors Module parameters:: -max_read_size -max_write_size - Maximum size of read or write requests. When a request larger than this size - is received, dm-crypt will split the request. The splitting improves - concurrency (the split requests could be encrypted in parallel by multiple - cores), but it also causes overhead. The user should tune these parameters to - fit the actual workload. + + max_read_size + max_write_size + Maximum size of read or write requests. When a request larger than this size + is received, dm-crypt will split the request. The splitting improves + concurrency (the split requests could be encrypted in parallel by multiple + cores), but it also causes overhead. The user should tune these parameters to + fit the actual workload. Example scripts diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f1384c7b59c9282fc6fb40569a92f6c13861bbb9..09126bb8cc9ffb140280c56895754a0e2c602f25 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4798,11 +4798,9 @@ profile= [KNL] Enable kernel profiling via /proc/profile Format: [,] - Param: : "schedule", "sleep", or "kvm" + Param: : "schedule" or "kvm" [defaults to kernel profiling] Param: "schedule" - profile schedule points. - Param: "sleep" - profile D-state sleeping (millisecs). - Requires CONFIG_SCHEDSTATS Param: "kvm" - profile VM exits. Param: - step/bucket size as a power of 2 for statistical time based profiling. diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst index bb83c5d8c6755149d5317c7eeba5f3ced9dc6014..50327c05be8d1b77fc7af27c695dbb97740cbca1 100644 --- a/Documentation/arch/arm64/silicon-errata.rst +++ b/Documentation/arch/arm64/silicon-errata.rst @@ -122,10 +122,18 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A76 | #1490853 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A76 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1491015 | N/A | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A77 | #1508412 | ARM64_ERRATUM_1508412 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A77 | #3324348 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A78C | #3324346,3324347| ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A710 | #2054223 | ARM64_ERRATUM_2054223 | @@ -138,8 +146,14 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A720 | #3456091 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A725 | #3456106 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X1 | #1502854 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1 | #3324344 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-X1C | #3324346 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2119858 | ARM64_ERRATUM_2119858 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-X2 | #2224489 | ARM64_ERRATUM_2224489 | @@ -160,6 +174,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #3324349 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2139208 | ARM64_ERRATUM_2139208 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N2 | #2067961 | ARM64_ERRATUM_2067961 | @@ -170,6 +186,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V1 | #1619801 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-V1 | #3324341 | ARM64_ERRATUM_3194386 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V2 | #3324336 | ARM64_ERRATUM_3194386 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-V3 | #3312417 | ARM64_ERRATUM_3194386 | diff --git a/Documentation/arch/riscv/hwprobe.rst b/Documentation/arch/riscv/hwprobe.rst index 3db60a0911df651abcb4deeee58d686d2d9f6ea5..85b709257918ade7590ebbf2f0f53dcdf8ed36be 100644 --- a/Documentation/arch/riscv/hwprobe.rst +++ b/Documentation/arch/riscv/hwprobe.rst @@ -239,25 +239,33 @@ The following keys are defined: ratified in commit 98918c844281 ("Merge pull request #1217 from riscv/zawrs") of riscv-isa-manual. -* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance - information about the selected set of processors. +* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: Deprecated. Returns similar values to + :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`, but the key was + mistakenly classified as a bitmask rather than a value. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned - accesses is unknown. +* :c:macro:`RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF`: An enum value describing + the performance of misaligned scalar native word accesses on the selected set + of processors. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are - emulated via software, either in or below the kernel. These accesses are - always extremely slow. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN`: The performance of + misaligned scalar accesses is unknown. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are slower - than equivalent byte accesses. Misaligned accesses may be supported - directly in hardware, or trapped and emulated by software. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED`: Misaligned scalar + accesses are emulated via software, either in or below the kernel. These + accesses are always extremely slow. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are faster - than equivalent byte accesses. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW`: Misaligned scalar native + word sized accesses are slower than the equivalent quantity of byte + accesses. Misaligned accesses may be supported directly in hardware, or + trapped and emulated by software. - * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are - not supported at all and will generate a misaligned address fault. + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_FAST`: Misaligned scalar native + word sized accesses are faster than the equivalent quantity of byte + accesses. + + * :c:macro:`RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED`: Misaligned scalar + accesses are not supported at all and will generate a misaligned address + fault. * :c:macro:`RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE`: An unsigned int which represents the size of the Zicboz block in bytes. diff --git a/Documentation/core-api/workqueue.rst b/Documentation/core-api/workqueue.rst index bcc370c876be9538c5020c6a14e8584d9090ebdf..16f861c9791e4ae2914bbf517c1a1835263711bf 100644 --- a/Documentation/core-api/workqueue.rst +++ b/Documentation/core-api/workqueue.rst @@ -260,7 +260,7 @@ Some users depend on strict execution ordering where only one work item is in flight at any given time and the work items are processed in queueing order. While the combination of ``@max_active`` of 1 and ``WQ_UNBOUND`` used to achieve this behavior, this is no longer the -case. Use ``alloc_ordered_queue()`` instead. +case. Use alloc_ordered_workqueue() instead. Example Execution Scenarios diff --git a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml index b5e5767d86988d3a9b44501e960fd484509a3e82..13eaa8d9a16e5a4bd43b3e184f9277494acf27a1 100644 --- a/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml +++ b/Documentation/devicetree/bindings/ata/rockchip,dwc-ahci.yaml @@ -35,6 +35,9 @@ properties: ports-implemented: const: 1 + power-domains: + maxItems: 1 + sata-port@0: $ref: /schemas/ata/snps,dwc-ahci-common.yaml#/$defs/dwc-ahci-port diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml index a584b4953e686d8ad8fdea7813aa03bcd4505978..46403b98411f81d6d136d8c9e5602095b72e0e33 100644 --- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml index 6b9c1d198b14d04c8999c0f6abb4ac36dae0e5d4..10afe984e2fbc1fd190b6cb901b0f1f5df43157f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-msm8994.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on MSM8994 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml index a5a29dc75ae11b99681708a6cee6bca9c32bba4c..1fe68e07a2b208b7caffe181328ddc26e09cd20e 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6125.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml index 2280b859b2ad6392e7d4912bf76731b665aa58e4..78e232fa95dc603b58848b45ffd2fc79b32577c0 100644 --- a/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,gcc-sm6350.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml index cf19f44af77440920371975d36ae8a3f427be928..4ff17a91344badaf572b15f657f0c740ea1859b7 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6115-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6115 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml index 374a1844a159a0e53752ef351bd2d5aaaf18bf05..10a9c96a97b6a1390736f701488311670017d911 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6125-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6125 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks and power domains on diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml index fd6658cb793dbb32526f7208f29a7cde9fa027b2..c03b30f64f359abd03e61a543522c81ce542e274 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6350-camcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Camera Clock & Reset Controller on SM6350 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm camera clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml index 183b1c75dbdf3b1401f00f050158787c68669712..3cd422a645fd876b9f834a6f8389160f5aab4bf2 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-dispcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Display Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm display clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml index 147b75a21508f1e6431c816b4f1d0827dc612a0b..de4e9066eeb835fd8215c40fed58c4b2baa28504 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gcc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Global Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm global clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml index cf4cad76f6c9517ac244d72297c90cd747a8b24e..d9dd479c17bd63956a43055e007c3a612ef5b569 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm6375-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM6375 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml index 46d1d91e3a01ef2d106f3b4986b5f9a4fb12d2c7..5c2ecec0624e35bde493ee7b39dbe8afbf295e2f 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8350-videocc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM8350 Video Clock & Reset Controller maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm video clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml index 3c2cac14e6c3a0ae676ad160e363a90893d5678e..d10bb002906e9f55f1f1dedc570f5e835181a504 100644 --- a/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml +++ b/Documentation/devicetree/bindings/clock/qcom,sm8450-gpucc.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Graphics Clock & Reset Controller on SM8450 maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm graphics clock control module provides the clocks, resets and power diff --git a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml index 8e8a288d318c34d8247abe0705f81c175fb15d99..e22b4c433fd07c2d92faefbdcaa5719c021e9041 100644 --- a/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml +++ b/Documentation/devicetree/bindings/display/msm/qcom,sm6375-mdss.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm SM6375 Display MDSS maintainers: - - Konrad Dybcio + - Konrad Dybcio description: SM6375 MSM Mobile Display Subsystem (MDSS), which encapsulates sub-blocks diff --git a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml index 2399cabf044c2fdde7b17fea546b69d095559d69..dd614e077bbff3d2d52dddc5657c0bc40d8e49a4 100644 --- a/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml +++ b/Documentation/devicetree/bindings/display/panel/asus,z00t-tm5p5-nt35596.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: ASUS Z00T TM5P5 NT35596 5.5" 1080×1920 LCD Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: |+ This panel seems to only be found in the Asus Z00T diff --git a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml index 5192c93fbd67a0524a77ce78af65b40d244f6c0c..032f783eefc4508df35da10e53ca20ff8b1b9bdf 100644 --- a/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml +++ b/Documentation/devicetree/bindings/display/panel/samsung,atna33xc20.yaml @@ -17,9 +17,12 @@ properties: oneOf: # Samsung 13.3" FHD (1920x1080 pixels) eDP AMOLED panel - const: samsung,atna33xc20 - # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel - items: - - const: samsung,atna45af01 + - enum: + # Samsung 14.5" WQXGA+ (2880x1800 pixels) eDP AMOLED panel + - samsung,atna45af01 + # Samsung 14.5" 3K (2944x1840 pixels) eDP AMOLED panel + - samsung,atna45dc02 - const: samsung,atna33xc20 enable-gpios: true diff --git a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml index 191b692125e145748003a800ee663e0c09d9e3cc..032a989184ff057cb28116ef2e2fcf001cfa2f40 100644 --- a/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml +++ b/Documentation/devicetree/bindings/display/panel/sony,td4353-jdi.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Sony TD4353 JDI 5 / 5.7" 2160x1080 MIPI-DSI Panel maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Sony TD4353 JDI is a 5 (XZ2c) / 5.7 (XZ2) inch 2160x1080 diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml index 1715b0c9feeafe9153d30a814135385201446767..c31e5e719525015ad4f06a3f7164a0b1e874901c 100644 --- a/Documentation/devicetree/bindings/eeprom/at25.yaml +++ b/Documentation/devicetree/bindings/eeprom/at25.yaml @@ -28,6 +28,7 @@ properties: - anvo,anv32e61w - atmel,at25256B - fujitsu,mb85rs1mt + - fujitsu,mb85rs256 - fujitsu,mb85rs64 - microchip,at25160bn - microchip,25lc040 diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml index 379721027bf84b927fb63c459aa142c10216a44a..51d48d4130d380d053f85f97f6a0428e483891b3 100644 --- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml +++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml @@ -42,6 +42,7 @@ properties: - focaltech,ft5426 - focaltech,ft5452 - focaltech,ft6236 + - focaltech,ft8201 - focaltech,ft8719 reg: diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml index 9fce7203bd42dbb57f0bf5d380b70d463ab1aaf6..78210791496f84c49989d6cb99a328ad997025b1 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc7280-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC7280 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml index 6c2da03f0cd2241bce21c62d5517ab80c4793f14..100c686369092687e78f7c039135d63b5a02343b 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sc8280xp-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SC8280XP maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml index 3cff7e66225538ff593fe31202f4561631a6b850..300640a533dd67e59aa9d16f06500c152b1510ef 100644 --- a/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml +++ b/Documentation/devicetree/bindings/interconnect/qcom,sm8450-rpmh.yaml @@ -8,7 +8,7 @@ title: Qualcomm RPMh Network-On-Chip Interconnect on SM8450 maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio description: | RPMh interconnect providers support system bandwidth requirements through diff --git a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml index 571e5746d1776f5b2ce0c0a90632a627fb4e75ab..f8cebc9e8cd9d46b449cd297153dbebe5c84bf3f 100644 --- a/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml +++ b/Documentation/devicetree/bindings/iommu/qcom,iommu.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies legacy IOMMU implementations maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | Qualcomm "B" family devices which are not compatible with arm-smmu have diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml index a1b71b35319e70f0b1a12a4c84fa1d95c7b08d76..42f9843d1868ac1177589e396527847e4b04e567 100644 --- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml +++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml @@ -38,6 +38,10 @@ properties: managed: true + phys: + description: A reference to the SerDes lane(s) + maxItems: 1 + required: - reg diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml index bd3cbb44c99a4638e9b9aa57c95c8b3def7171e6..e75393b3d196c58f13c7c2f0eb47ac5ab69e250a 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,mdm9607-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. MDM9607 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm MDM9607 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml index a4771f87d93645549d9cfbb8b39315065d0b8cf6..b262af6be97da3207e2711a1367e400b2a25adae 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6350-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6350 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6350 SoC. diff --git a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml index 047f82863f9bbfdfcd870a35656d0b56e6c018ba..c11af09c3f5b89e19e82d6f77cd72a69dc3a6afd 100644 --- a/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml +++ b/Documentation/devicetree/bindings/pinctrl/qcom,sm6375-tlmm.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. SM6375 TLMM block maintainers: - - Konrad Dybcio + - Konrad Dybcio description: Top Level Mode Multiplexer pin controller in Qualcomm SM6375 SoC. diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml index 7afafde17a38bfcb41abfb960841b054abf9faa7..61cf4fe19ca53ebe33397ccbdcf29b936476eef0 100644 --- a/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml +++ b/Documentation/devicetree/bindings/remoteproc/qcom,rpm-proc.yaml @@ -8,7 +8,7 @@ title: Qualcomm Resource Power Manager (RPM) Processor/Subsystem maintainers: - Bjorn Andersson - - Konrad Dybcio + - Konrad Dybcio - Stephan Gerhold description: | diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml index 9410404f87f1afb864c79614ada2659745a37ae9..ad2dcc39a5f5480f31057275d71c76e5ab864890 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,rpm-master-stats.yaml @@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: Qualcomm Technologies, Inc. (QTI) RPM Master Stats maintainers: - - Konrad Dybcio + - Konrad Dybcio description: | The Qualcomm RPM (Resource Power Manager) architecture includes a concept diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml index beb0ff0245b0ab84477b282243d9ce43c633c078..a65b1d1d5fdd9c50de429998f0d23c2fbbffa369 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml @@ -199,10 +199,11 @@ additionalProperties: false examples: - | + #include codec@1,0{ compatible = "slim217,250"; reg = <1 0>; - reset-gpios = <&tlmm 64 0>; + reset-gpios = <&tlmm 64 GPIO_ACTIVE_LOW>; slim-ifc-dev = <&wcd9340_ifd>; #sound-dai-cells = <1>; interrupt-parent = <&tlmm>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml index de397d879acc2e6e95e740cd4a2139bc88ff4c98..f94203798f24b36e33ff18dd41056e682d034a6e 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd937x.yaml @@ -42,7 +42,7 @@ examples: pinctrl-names = "default", "sleep"; pinctrl-0 = <&wcd_reset_n>; pinctrl-1 = <&wcd_reset_n_sleep>; - reset-gpios = <&tlmm 83 GPIO_ACTIVE_HIGH>; + reset-gpios = <&tlmm 83 GPIO_ACTIVE_LOW>; vdd-buck-supply = <&vreg_l17b_1p8>; vdd-rxtx-supply = <&vreg_l18b_1p8>; vdd-px-supply = <&vreg_l18b_1p8>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml index cf6c3787adfeff846a13391775fa188c15d17e33..10531350c3362788d1943100808ff9cf7d7036cc 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml @@ -34,9 +34,10 @@ unevaluatedProperties: false examples: - | + #include codec { compatible = "qcom,wcd9380-codec"; - reset-gpios = <&tlmm 32 0>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd938x_tx>; qcom,rx-device = <&wcd938x_rx>; diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml index 6e76f6a8634f0761c67e2bd3f57268dcb347e66e..c69291f4d575ca5b4fc037ac4d59c91f0df74cd1 100644 --- a/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml +++ b/Documentation/devicetree/bindings/sound/qcom,wcd939x.yaml @@ -52,10 +52,10 @@ unevaluatedProperties: false examples: - | - #include + #include codec { compatible = "qcom,wcd9390-codec"; - reset-gpios = <&tlmm 32 IRQ_TYPE_NONE>; + reset-gpios = <&tlmm 32 GPIO_ACTIVE_LOW>; #sound-dai-cells = <1>; qcom,tx-device = <&wcd939x_tx>; qcom,rx-device = <&wcd939x_rx>; diff --git a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml index 783c27591e564c7b8020d0be5c603f557661cda7..245e8c3ce6699342fd610c15df5806f2e71a4c13 100644 --- a/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml +++ b/Documentation/devicetree/bindings/usb/microchip,usb2514.yaml @@ -18,6 +18,7 @@ properties: - usb424,2412 - usb424,2417 - usb424,2514 + - usb424,2517 reg: true diff --git a/Documentation/driver-api/thermal/sysfs-api.rst b/Documentation/driver-api/thermal/sysfs-api.rst index 6c1175c6afba99ce914325fb87cd71f56f643e07..978198f8a18bed3e1aa19d8f4009b524583feed4 100644 --- a/Documentation/driver-api/thermal/sysfs-api.rst +++ b/Documentation/driver-api/thermal/sysfs-api.rst @@ -4,8 +4,6 @@ Generic Thermal Sysfs driver How To Written by Sujith Thomas , Zhang Rui -Updated: 2 January 2008 - Copyright (c) 2008 Intel Corporation @@ -38,23 +36,23 @@ temperature) and throttle appropriate devices. :: - struct thermal_zone_device - *thermal_zone_device_register(char *type, - int trips, int mask, void *devdata, - struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, - int passive_delay, int polling_delay)) + struct thermal_zone_device * + thermal_zone_device_register_with_trips(const char *type, + const struct thermal_trip *trips, + int num_trips, void *devdata, + const struct thermal_zone_device_ops *ops, + const struct thermal_zone_params *tzp, + unsigned int passive_delay, + unsigned int polling_delay) - This interface function adds a new thermal zone device (sensor) to + This interface function adds a new thermal zone device (sensor) to the /sys/class/thermal folder as `thermal_zone[0-*]`. It tries to bind all the - thermal cooling devices registered at the same time. + thermal cooling devices registered to it at the same time. type: the thermal zone type. trips: - the total number of trip points this thermal zone supports. - mask: - Bit string: If 'n'th bit is set, then trip point 'n' is writable. + the table of trip points for this thermal zone. devdata: device private data ops: @@ -67,32 +65,29 @@ temperature) and throttle appropriate devices. .get_temp: get the current temperature of the thermal zone. .set_trips: - set the trip points window. Whenever the current temperature - is updated, the trip points immediately below and above the - current temperature are found. - .get_mode: - get the current mode (enabled/disabled) of the thermal zone. - - - "enabled" means the kernel thermal management is - enabled. - - "disabled" will prevent kernel thermal driver action - upon trip points so that user applications can take - charge of thermal management. - .set_mode: - set the mode (enabled/disabled) of the thermal zone. - .get_trip_type: - get the type of certain trip point. - .get_trip_temp: - get the temperature above which the certain trip point - will be fired. + set the trip points window. Whenever the current temperature + is updated, the trip points immediately below and above the + current temperature are found. + .change_mode: + change the mode (enabled/disabled) of the thermal zone. + .set_trip_temp: + set the temperature of a given trip point. + .get_crit_temp: + get the critical temperature for this thermal zone. .set_emul_temp: - set the emulation temperature which helps in debugging - different threshold temperature points. + set the emulation temperature which helps in debugging + different threshold temperature points. + .get_trend: + get the trend of most recent zone temperature changes. + .hot: + hot trip point crossing handler. + .critical: + critical trip point crossing handler. tzp: thermal zone platform parameters. passive_delay: - number of milliseconds to wait between polls when - performing passive cooling. + number of milliseconds to wait between polls when performing passive + cooling. polling_delay: number of milliseconds to wait between polls when checking whether trip points have been crossed (0 for interrupt driven systems). diff --git a/Documentation/filesystems/caching/fscache.rst b/Documentation/filesystems/caching/fscache.rst index a74d7b052dc1305a8adbcd374438327605e92fb8..de1f32526cc1c4c9c65af2bde0da200ec42c4d98 100644 --- a/Documentation/filesystems/caching/fscache.rst +++ b/Documentation/filesystems/caching/fscache.rst @@ -318,10 +318,10 @@ where the columns are: Debugging ========= -If CONFIG_FSCACHE_DEBUG is enabled, the FS-Cache facility can have runtime -debugging enabled by adjusting the value in:: +If CONFIG_NETFS_DEBUG is enabled, the FS-Cache facility and NETFS support can +have runtime debugging enabled by adjusting the value in:: - /sys/module/fscache/parameters/debug + /sys/module/netfs/parameters/debug This is a bitmask of debugging streams to enable: @@ -343,6 +343,6 @@ This is a bitmask of debugging streams to enable: The appropriate set of values should be OR'd together and the result written to the control file. For example:: - echo $((1|8|512)) >/sys/module/fscache/parameters/debug + echo $((1|8|512)) >/sys/module/netfs/parameters/debug will turn on all function entry debugging. diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst index cc4626d6ee4f836b50dcb8b3e85de56d01da968f..c293f8e37468cddacafb5d3edc615134408d6b3b 100644 --- a/Documentation/filesystems/erofs.rst +++ b/Documentation/filesystems/erofs.rst @@ -75,7 +75,7 @@ Here are the main features of EROFS: - Support merging tail-end data into a special inode as fragments. - - Support large folios for uncompressed files. + - Support large folios to make use of THPs (Transparent Hugepages); - Support direct I/O on uncompressed files to avoid double caching for loop devices; diff --git a/Documentation/filesystems/smb/ksmbd.rst b/Documentation/filesystems/smb/ksmbd.rst index 6b30e43a0d11f49959d700e38fbfe115d71fd40c..67cb68ea6e6880db27cf4d192ec35cb3daf0bb1c 100644 --- a/Documentation/filesystems/smb/ksmbd.rst +++ b/Documentation/filesystems/smb/ksmbd.rst @@ -13,7 +13,7 @@ KSMBD architecture The subset of performance related operations belong in kernelspace and the other subset which belong to operations which are not really related with performance in userspace. So, DCE/RPC management that has historically resulted -into number of buffer overflow issues and dangerous security bugs and user +into a number of buffer overflow issues and dangerous security bugs and user account management are implemented in user space as ksmbd.mountd. File operations that are related with performance (open/read/write/close etc.) in kernel space (ksmbd). This also allows for easier integration with VFS @@ -24,8 +24,8 @@ ksmbd (kernel daemon) When the server daemon is started, It starts up a forker thread (ksmbd/interface name) at initialization time and open a dedicated port 445 -for listening to SMB requests. Whenever new clients make request, Forker -thread will accept the client connection and fork a new thread for dedicated +for listening to SMB requests. Whenever new clients make a request, the Forker +thread will accept the client connection and fork a new thread for a dedicated communication channel between the client and the server. It allows for parallel processing of SMB requests(commands) from clients as well as allowing for new clients to make new connections. Each instance is named ksmbd/1~n(port number) @@ -34,12 +34,12 @@ thread can decide to pass through the commands to the user space (ksmbd.mountd), currently DCE/RPC commands are identified to be handled through the user space. To further utilize the linux kernel, it has been chosen to process the commands as workitems and to be executed in the handlers of the ksmbd-io kworker threads. -It allows for multiplexing of the handlers as the kernel take care of initiating +It allows for multiplexing of the handlers as the kernel takes care of initiating extra worker threads if the load is increased and vice versa, if the load is -decreased it destroys the extra worker threads. So, after connection is -established with client. Dedicated ksmbd/1..n(port number) takes complete +decreased it destroys the extra worker threads. So, after the connection is +established with the client. Dedicated ksmbd/1..n(port number) takes complete ownership of receiving/parsing of SMB commands. Each received command is worked -in parallel i.e., There can be multiple clients commands which are worked in +in parallel i.e., there can be multiple client commands which are worked in parallel. After receiving each command a separated kernel workitem is prepared for each command which is further queued to be handled by ksmbd-io kworkers. So, each SMB workitem is queued to the kworkers. This allows the benefit of load @@ -49,9 +49,9 @@ performance by handling client commands in parallel. ksmbd.mountd (user space daemon) -------------------------------- -ksmbd.mountd is userspace process to, transfer user account and password that +ksmbd.mountd is a userspace process to, transfer the user account and password that are registered using ksmbd.adduser (part of utils for user space). Further it -allows sharing information parameters that parsed from smb.conf to ksmbd in +allows sharing information parameters that are parsed from smb.conf to ksmbd in kernel. For the execution part it has a daemon which is continuously running and connected to the kernel interface using netlink socket, it waits for the requests (dcerpc and share/user info). It handles RPC calls (at a minimum few @@ -124,7 +124,7 @@ How to run 1. Download ksmbd-tools(https://github.com/cifsd-team/ksmbd-tools/releases) and compile them. - - Refer README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) + - Refer to README(https://github.com/cifsd-team/ksmbd-tools/blob/master/README.md) to know how to use ksmbd.mountd/adduser/addshare/control utils $ ./autogen.sh @@ -133,7 +133,7 @@ How to run 2. Create /usr/local/etc/ksmbd/ksmbd.conf file, add SMB share in ksmbd.conf file. - - Refer ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage + - Refer to ksmbd.conf.example in ksmbd-utils, See ksmbd.conf manpage for details to configure shares. $ man ksmbd.conf @@ -145,7 +145,7 @@ How to run $ man ksmbd.adduser $ sudo ksmbd.adduser -a -4. Insert ksmbd.ko module after build your kernel. No need to load module +4. Insert the ksmbd.ko module after you build your kernel. No need to load the module if ksmbd is built into the kernel. - Set ksmbd in menuconfig(e.g. $ make menuconfig) @@ -175,7 +175,7 @@ Each layer 1. Enable all component prints # sudo ksmbd.control -d "all" -2. Enable one of components (smb, auth, vfs, oplock, ipc, conn, rdma) +2. Enable one of the components (smb, auth, vfs, oplock, ipc, conn, rdma) # sudo ksmbd.control -d "smb" 3. Show what prints are enabled. diff --git a/Documentation/kbuild/llvm.rst b/Documentation/kbuild/llvm.rst index bb5c44f8bd1c49cd88263c94200405479eea8325..6dc66b4f31a7bb62ba5bb6174730dc0a69ee0bba 100644 --- a/Documentation/kbuild/llvm.rst +++ b/Documentation/kbuild/llvm.rst @@ -126,7 +126,7 @@ Ccache ``ccache`` can be used with ``clang`` to improve subsequent builds, (though KBUILD_BUILD_TIMESTAMP_ should be set to a deterministic value between builds -in order to avoid 100% cache misses, see Reproducible_builds_ for more info): +in order to avoid 100% cache misses, see Reproducible_builds_ for more info):: KBUILD_BUILD_TIMESTAMP='' make LLVM=1 CC="ccache clang" diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 495e35fcfb218c8bc667011a43c2bd9b74229bbf..ea21fe135b97eb26f2d90967a31128799e92f2e7 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1753,6 +1753,7 @@ operations: request: attributes: - header + - context reply: attributes: - header @@ -1761,7 +1762,6 @@ operations: - indir - hkey - input_xfrm - dump: *rss-get-op - name: plca-get-cfg doc: Get PLCA params. diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 3ab423b80e913c2b7293727307777827c6e26449..d5f246aceb9f481a5ad0db84937aada30e1e81dd 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1875,6 +1875,7 @@ Kernel response contents: ===================================== ====== ========================== ``ETHTOOL_A_RSS_HEADER`` nested reply header + ``ETHTOOL_A_RSS_CONTEXT`` u32 context number ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes diff --git a/Documentation/process/embargoed-hardware-issues.rst b/Documentation/process/embargoed-hardware-issues.rst index 6e9a4597bf2cbbb9ddb3d9b727c07150efa37f25..daebce49cfdf5ff7d59ea1c23e00d2493d5238a0 100644 --- a/Documentation/process/embargoed-hardware-issues.rst +++ b/Documentation/process/embargoed-hardware-issues.rst @@ -13,9 +13,9 @@ kernel. Hardware issues like Meltdown, Spectre, L1TF etc. must be treated differently because they usually affect all Operating Systems ("OS") and therefore need coordination across different OS vendors, distributions, -hardware vendors and other parties. For some of the issues, software -mitigations can depend on microcode or firmware updates, which need further -coordination. +silicon vendors, hardware integrators, and other parties. For some of the +issues, software mitigations can depend on microcode or firmware updates, +which need further coordination. .. _Contact: @@ -32,8 +32,8 @@ Linux kernel security team (:ref:`Documentation/admin-guide/ `) instead. The team can be contacted by email at . This -is a private list of security officers who will help you to coordinate a -fix according to our documented process. +is a private list of security officers who will help you coordinate a fix +according to our documented process. The list is encrypted and email to the list can be sent by either PGP or S/MIME encrypted and must be signed with the reporter's PGP key or S/MIME @@ -43,7 +43,7 @@ the following URLs: - PGP: https://www.kernel.org/static/files/hardware-security.asc - S/MIME: https://www.kernel.org/static/files/hardware-security.crt -While hardware security issues are often handled by the affected hardware +While hardware security issues are often handled by the affected silicon vendor, we welcome contact from researchers or individuals who have identified a potential hardware flaw. @@ -65,7 +65,7 @@ of Linux Foundation's IT operations personnel technically have the ability to access the embargoed information, but are obliged to confidentiality by their employment contract. Linux Foundation IT personnel are also responsible for operating and managing the rest of -kernel.org infrastructure. +kernel.org's infrastructure. The Linux Foundation's current director of IT Project infrastructure is Konstantin Ryabitsev. @@ -85,7 +85,7 @@ Memorandum of Understanding The Linux kernel community has a deep understanding of the requirement to keep hardware security issues under embargo for coordination between -different OS vendors, distributors, hardware vendors and other parties. +different OS vendors, distributors, silicon vendors, and other parties. The Linux kernel community has successfully handled hardware security issues in the past and has the necessary mechanisms in place to allow @@ -103,11 +103,11 @@ the issue in the best technical way. All involved developers pledge to adhere to the embargo rules and to keep the received information confidential. Violation of the pledge will lead to immediate exclusion from the current issue and removal from all related -mailing-lists. In addition, the hardware security team will also exclude +mailing lists. In addition, the hardware security team will also exclude the offender from future issues. The impact of this consequence is a highly effective deterrent in our community. In case a violation happens the hardware security team will inform the involved parties immediately. If you -or anyone becomes aware of a potential violation, please report it +or anyone else becomes aware of a potential violation, please report it immediately to the Hardware security officers. @@ -124,14 +124,16 @@ method for these types of issues. Start of Disclosure """"""""""""""""""" -Disclosure starts by contacting the Linux kernel hardware security team by -email. This initial contact should contain a description of the problem and -a list of any known affected hardware. If your organization builds or -distributes the affected hardware, we encourage you to also consider what -other hardware could be affected. +Disclosure starts by emailing the Linux kernel hardware security team per +the Contact section above. This initial contact should contain a +description of the problem and a list of any known affected silicon. If +your organization builds or distributes the affected hardware, we encourage +you to also consider what other hardware could be affected. The disclosing +party is responsible for contacting the affected silicon vendors in a +timely manner. The hardware security team will provide an incident-specific encrypted -mailing-list which will be used for initial discussion with the reporter, +mailing list which will be used for initial discussion with the reporter, further disclosure, and coordination of fixes. The hardware security team will provide the disclosing party a list of @@ -158,8 +160,8 @@ This serves several purposes: - The disclosed entities can be contacted to name experts who should participate in the mitigation development. - - If an expert which is required to handle an issue is employed by an - listed entity or member of an listed entity, then the response teams can + - If an expert who is required to handle an issue is employed by a listed + entity or member of an listed entity, then the response teams can request the disclosure of that expert from that entity. This ensures that the expert is also part of the entity's response team. @@ -169,8 +171,8 @@ Disclosure The disclosing party provides detailed information to the initial response team via the specific encrypted mailing-list. -From our experience the technical documentation of these issues is usually -a sufficient starting point and further technical clarification is best +From our experience, the technical documentation of these issues is usually +a sufficient starting point, and further technical clarification is best done via email. Mitigation development @@ -179,57 +181,93 @@ Mitigation development The initial response team sets up an encrypted mailing-list or repurposes an existing one if appropriate. -Using a mailing-list is close to the normal Linux development process and -has been successfully used in developing mitigations for various hardware +Using a mailing list is close to the normal Linux development process and +has been successfully used to develop mitigations for various hardware security issues in the past. -The mailing-list operates in the same way as normal Linux development. -Patches are posted, discussed and reviewed and if agreed on applied to a -non-public git repository which is only accessible to the participating +The mailing list operates in the same way as normal Linux development. +Patches are posted, discussed, and reviewed and if agreed upon, applied to +a non-public git repository which is only accessible to the participating developers via a secure connection. The repository contains the main development branch against the mainline kernel and backport branches for stable kernel versions as necessary. The initial response team will identify further experts from the Linux -kernel developer community as needed. Bringing in experts can happen at any -time of the development process and needs to be handled in a timely manner. +kernel developer community as needed. Any involved party can suggest +further experts to be included, each of which will be subject to the same +requirements outlined above. -If an expert is employed by or member of an entity on the disclosure list +Bringing in experts can happen at any time in the development process and +needs to be handled in a timely manner. + +If an expert is employed by or a member of an entity on the disclosure list provided by the disclosing party, then participation will be requested from the relevant entity. -If not, then the disclosing party will be informed about the experts +If not, then the disclosing party will be informed about the experts' participation. The experts are covered by the Memorandum of Understanding -and the disclosing party is requested to acknowledge the participation. In -case that the disclosing party has a compelling reason to object, then this -objection has to be raised within five work days and resolved with the -incident team immediately. If the disclosing party does not react within -five work days this is taken as silent acknowledgement. +and the disclosing party is requested to acknowledge their participation. +In the case where the disclosing party has a compelling reason to object, +any objection must to be raised within five working days and resolved with +the incident team immediately. If the disclosing party does not react +within five working days this is taken as silent acknowledgment. -After acknowledgement or resolution of an objection the expert is disclosed -by the incident team and brought into the development process. +After the incident team acknowledges or resolves an objection, the expert +is disclosed and brought into the development process. List participants may not communicate about the issue outside of the private mailing list. List participants may not use any shared resources (e.g. employer build farms, CI systems, etc) when working on patches. +Early access +"""""""""""" + +The patches discussed and developed on the list can neither be distributed +to any individual who is not a member of the response team nor to any other +organization. + +To allow the affected silicon vendors to work with their internal teams and +industry partners on testing, validation, and logistics, the following +exception is provided: + + Designated representatives of the affected silicon vendors are + allowed to hand over the patches at any time to the silicon + vendor’s response team. The representative must notify the kernel + response team about the handover. The affected silicon vendor must + have and maintain their own documented security process for any + patches shared with their response team that is consistent with + this policy. + + The silicon vendor’s response team can distribute these patches to + their industry partners and to their internal teams under the + silicon vendor’s documented security process. Feedback from the + industry partners goes back to the silicon vendor and is + communicated by the silicon vendor to the kernel response team. + + The handover to the silicon vendor’s response team removes any + responsibility or liability from the kernel response team regarding + premature disclosure, which happens due to the involvement of the + silicon vendor’s internal teams or industry partners. The silicon + vendor guarantees this release of liability by agreeing to this + process. Coordinated release """"""""""""""""""" -The involved parties will negotiate the date and time where the embargo -ends. At that point the prepared mitigations are integrated into the -relevant kernel trees and published. There is no pre-notification process: -fixes are published in public and available to everyone at the same time. +The involved parties will negotiate the date and time when the embargo +ends. At that point, the prepared mitigations are published into the +relevant kernel trees. There is no pre-notification process: the +mitigations are published in public and available to everyone at the same +time. While we understand that hardware security issues need coordinated embargo -time, the embargo time should be constrained to the minimum time which is -required for all involved parties to develop, test and prepare the +time, the embargo time should be constrained to the minimum time that is +required for all involved parties to develop, test, and prepare their mitigations. Extending embargo time artificially to meet conference talk -dates or other non-technical reasons is creating more work and burden for -the involved developers and response teams as the patches need to be kept -up to date in order to follow the ongoing upstream kernel development, -which might create conflicting changes. +dates or other non-technical reasons creates more work and burden for the +involved developers and response teams as the patches need to be kept up to +date in order to follow the ongoing upstream kernel development, which +might create conflicting changes. CVE assignment """""""""""""" @@ -275,34 +313,35 @@ an involved disclosed party. The current ambassadors list: If you want your organization to be added to the ambassadors list, please contact the hardware security team. The nominated ambassador has to -understand and support our process fully and is ideally well connected in +understand and support our process fully and is ideally well-connected in the Linux kernel community. Encrypted mailing-lists ----------------------- -We use encrypted mailing-lists for communication. The operating principle +We use encrypted mailing lists for communication. The operating principle of these lists is that email sent to the list is encrypted either with the -list's PGP key or with the list's S/MIME certificate. The mailing-list +list's PGP key or with the list's S/MIME certificate. The mailing list software decrypts the email and re-encrypts it individually for each subscriber with the subscriber's PGP key or S/MIME certificate. Details -about the mailing-list software and the setup which is used to ensure the +about the mailing list software and the setup that is used to ensure the security of the lists and protection of the data can be found here: https://korg.wiki.kernel.org/userdoc/remail. List keys ^^^^^^^^^ -For initial contact see :ref:`Contact`. For incident specific mailing-lists -the key and S/MIME certificate are conveyed to the subscribers by email -sent from the specific list. +For initial contact see the :ref:`Contact` section above. For incident +specific mailing lists, the key and S/MIME certificate are conveyed to the +subscribers by email sent from the specific list. -Subscription to incident specific lists +Subscription to incident-specific lists ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Subscription is handled by the response teams. Disclosed parties who want -to participate in the communication send a list of potential subscribers to -the response team so the response team can validate subscription requests. +Subscription to incident-specific lists is handled by the response teams. +Disclosed parties who want to participate in the communication send a list +of potential experts to the response team so the response team can validate +subscription requests. Each subscriber needs to send a subscription request to the response team by email. The email must be signed with the subscriber's PGP key or S/MIME diff --git a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst index f02e6cf3516a2047ada5779b09d2f0441306d148..74df19be91f69dec49951566d13bb52fbb637988 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-yuv-luma.rst @@ -21,9 +21,9 @@ are often referred to as greyscale formats. .. raw:: latex - \scriptsize + \tiny -.. tabularcolumns:: |p{3.6cm}|p{3.0cm}|p{1.3cm}|p{2.6cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| +.. tabularcolumns:: |p{3.6cm}|p{2.4cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}|p{1.3cm}| .. flat-table:: Luma-Only Image Formats :header-rows: 1 diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index fe722c5dada9f81e189a3e87426fda3883bbbd06..b3be87489108e8a3ebac587361881632f53154f5 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2592,7 +2592,7 @@ Specifically: 0x6030 0000 0010 004a SPSR_ABT 64 spsr[KVM_SPSR_ABT] 0x6030 0000 0010 004c SPSR_UND 64 spsr[KVM_SPSR_UND] 0x6030 0000 0010 004e SPSR_IRQ 64 spsr[KVM_SPSR_IRQ] - 0x6060 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] + 0x6030 0000 0010 0050 SPSR_FIQ 64 spsr[KVM_SPSR_FIQ] 0x6040 0000 0010 0054 V0 128 fp_regs.vregs[0] [1]_ 0x6040 0000 0010 0058 V1 128 fp_regs.vregs[1] [1]_ ... @@ -6368,7 +6368,7 @@ a single guest_memfd file, but the bound ranges must not overlap). See KVM_SET_USER_MEMORY_REGION2 for additional details. 4.143 KVM_PRE_FAULT_MEMORY ------------------------- +--------------------------- :Capability: KVM_CAP_PRE_FAULT_MEMORY :Architectures: none @@ -6405,6 +6405,12 @@ for the current vCPU state. KVM maps memory as if the vCPU generated a stage-2 read page fault, e.g. faults in memory as needed, but doesn't break CoW. However, KVM does not mark any newly created stage-2 PTE as Accessed. +In the case of confidential VM types where there is an initial set up of +private guest memory before the guest is 'finalized'/measured, this ioctl +should only be issued after completing all the necessary setup to put the +guest into a 'finalized' state so that the above semantics can be reliably +ensured. + In some cases, multiple vCPUs might share the page tables. In this case, the ioctl can be called in parallel. diff --git a/Documentation/wmi/devices/msi-wmi-platform.rst b/Documentation/wmi/devices/msi-wmi-platform.rst index 29b1b2e6d42cdc2f37522c555cd1daa152c7d5f2..31a13694289238a04d9099718f55d6d86b06d678 100644 --- a/Documentation/wmi/devices/msi-wmi-platform.rst +++ b/Documentation/wmi/devices/msi-wmi-platform.rst @@ -130,12 +130,12 @@ data using the `bmfdec `_ utility: Due to a peculiarity in how Windows handles the ``CreateByteField()`` ACPI operator (errors only happen when a invalid byte field is ultimately accessed), all methods require a 32 byte input -buffer, even if the Binay MOF says otherwise. +buffer, even if the Binary MOF says otherwise. The input buffer contains a single byte to select the subfeature to be accessed and 31 bytes of input data, the meaning of which depends on the subfeature being accessed. -The output buffer contains a singe byte which signals success or failure (``0x00`` on failure) +The output buffer contains a single byte which signals success or failure (``0x00`` on failure) and 31 bytes of output data, the meaning if which depends on the subfeature being accessed. WMI method Get_EC() @@ -147,7 +147,7 @@ data contains a flag byte and a 28 byte controller firmware version string. The first 4 bits of the flag byte contain the minor version of the embedded controller interface, with the next 2 bits containing the major version of the embedded controller interface. -The 7th bit signals if the embedded controller page chaged (exact meaning is unknown), and the +The 7th bit signals if the embedded controller page changed (exact meaning is unknown), and the last bit signals if the platform is a Tigerlake platform. The MSI software seems to only use this interface when the last bit is set. diff --git a/MAINTAINERS b/MAINTAINERS index 42decde3832066e43a460a83b0634e4d9af0c797..878dcd23b3317b8ad25d2e36184213b191605a45 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3504,7 +3504,9 @@ S: Maintained W: http://linux-atm.sourceforge.net F: drivers/atm/ F: include/linux/atm* +F: include/linux/sonet.h F: include/uapi/linux/atm* +F: include/uapi/linux/sonet.h ATMEL MACB ETHERNET DRIVER M: Nicolas Ferre @@ -5306,7 +5308,7 @@ F: drivers/media/cec/i2c/ch7322.c CIRRUS LOGIC AUDIO CODEC DRIVERS M: David Rhodes M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Maintained F: Documentation/devicetree/bindings/sound/cirrus,cs* @@ -5375,7 +5377,7 @@ F: sound/soc/codecs/lochnagar-sc.c CIRRUS LOGIC MADERA CODEC DRIVERS M: Charles Keepax M: Richard Fitzgerald -L: alsa-devel@alsa-project.org (moderated for non-subscribers) +L: linux-sound@vger.kernel.org L: patches@opensource.cirrus.com S: Supported W: https://github.com/CirrusLogic/linux-drivers/wiki @@ -10173,7 +10175,7 @@ F: Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt F: drivers/infiniband/hw/hns/ HISILICON SAS Controller -M: Xiang Chen +M: Yihang Li S: Supported W: http://www.hisilicon.com F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt @@ -11993,7 +11995,7 @@ F: fs/jfs/ JME NETWORK DRIVER M: Guo-Fu Tseng L: netdev@vger.kernel.org -S: Maintained +S: Odd Fixes F: drivers/net/ethernet/jme.* JOURNALLING FLASH FILE SYSTEM V2 (JFFS2) @@ -13324,14 +13326,16 @@ F: Documentation/devicetree/bindings/i2c/i2c-mux-ltc4306.txt F: drivers/i2c/muxes/i2c-mux-ltc4306.c LTP (Linux Test Project) +M: Andrea Cervesato M: Cyril Hrubis M: Jan Stancek M: Petr Vorel M: Li Wang M: Yang Xu +M: Xiao Yang L: ltp@lists.linux.it (subscribers-only) S: Maintained -W: http://linux-test-project.github.io/ +W: https://linux-test-project.readthedocs.io/ T: git https://github.com/linux-test-project/ltp.git LTR390 AMBIENT/UV LIGHT SENSOR DRIVER @@ -13539,7 +13543,7 @@ MARVELL GIGABIT ETHERNET DRIVERS (skge/sky2) M: Mirko Lindner M: Stephen Hemminger L: netdev@vger.kernel.org -S: Maintained +S: Odd fixes F: drivers/net/ethernet/marvell/sk* MARVELL LIBERTAS WIRELESS DRIVER @@ -15875,15 +15879,19 @@ F: drivers/net/ F: include/dt-bindings/net/ F: include/linux/cn_proc.h F: include/linux/etherdevice.h +F: include/linux/ethtool_netlink.h F: include/linux/fcdevice.h F: include/linux/fddidevice.h F: include/linux/hippidevice.h F: include/linux/if_* F: include/linux/inetdevice.h -F: include/linux/netdevice.h +F: include/linux/netdev* +F: include/linux/platform_data/wiznet.h F: include/uapi/linux/cn_proc.h +F: include/uapi/linux/ethtool_netlink.h F: include/uapi/linux/if_* -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netdev* +F: tools/testing/selftests/drivers/net/ X: drivers/net/wireless/ NETWORKING DRIVERS (WIRELESS) @@ -15934,13 +15942,28 @@ F: include/linux/framer/framer-provider.h F: include/linux/framer/framer.h F: include/linux/in.h F: include/linux/indirect_call_wrapper.h +F: include/linux/inet.h +F: include/linux/inet_diag.h F: include/linux/net.h -F: include/linux/netdevice.h +F: include/linux/netdev* +F: include/linux/netlink.h +F: include/linux/netpoll.h +F: include/linux/rtnetlink.h +F: include/linux/seq_file_net.h +F: include/linux/skbuff* F: include/net/ +F: include/uapi/linux/genetlink.h +F: include/uapi/linux/hsr_netlink.h F: include/uapi/linux/in.h +F: include/uapi/linux/inet_diag.h +F: include/uapi/linux/nbd-netlink.h F: include/uapi/linux/net.h F: include/uapi/linux/net_namespace.h -F: include/uapi/linux/netdevice.h +F: include/uapi/linux/netconf.h +F: include/uapi/linux/netdev* +F: include/uapi/linux/netlink.h +F: include/uapi/linux/netlink_diag.h +F: include/uapi/linux/rtnetlink.h F: lib/net_utils.c F: lib/random32.c F: net/ @@ -18556,7 +18579,7 @@ F: drivers/usb/misc/qcom_eud.c QCOM IPA DRIVER M: Alex Elder L: netdev@vger.kernel.org -S: Supported +S: Maintained F: drivers/net/ipa/ QEMU MACHINE EMULATOR AND VIRTUALIZER SUPPORT @@ -20351,6 +20374,7 @@ F: Documentation/devicetree/bindings/scsi/ F: drivers/scsi/ F: drivers/ufs/ F: include/scsi/ +F: include/uapi/scsi/ SCSI TAPE DRIVER M: Kai Mäkisara @@ -21051,6 +21075,7 @@ SOCKET TIMESTAMPING M: Willem de Bruijn S: Maintained F: Documentation/networking/timestamping.rst +F: include/linux/net_tstamp.h F: include/uapi/linux/net_tstamp.h F: tools/testing/selftests/net/so_txtime.c diff --git a/Makefile b/Makefile index 8ad55d6e7b60f270f1806aa53376f2b5b9b5f93a..7b60eb103c5d3cfa0b87c1761231e77b61332e6d 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 6 PATCHLEVEL = 11 SUBLEVEL = 0 -EXTRAVERSION = -rc1 +EXTRAVERSION = -rc5 NAME = Baby Opossum Posse # *DOCUMENTATION* @@ -1963,7 +1963,7 @@ tags TAGS cscope gtags: FORCE # Protocol). PHONY += rust-analyzer rust-analyzer: - $(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh + +$(Q)$(CONFIG_SHELL) $(srctree)/scripts/rust_is_available.sh $(Q)$(MAKE) $(build)=rust $@ # Script to generate missing namespace dependencies @@ -1980,7 +1980,7 @@ nsdeps: modules quiet_cmd_gen_compile_commands = GEN $@ cmd_gen_compile_commands = $(PYTHON3) $< -a $(AR) -o $@ $(filter-out $<, $(real-prereqs)) -$(extmod_prefix)compile_commands.json: scripts/clang-tools/gen_compile_commands.py \ +$(extmod_prefix)compile_commands.json: $(srctree)/scripts/clang-tools/gen_compile_commands.py \ $(if $(KBUILD_EXTMOD),, vmlinux.a $(KBUILD_VMLINUX_LIBS)) \ $(if $(CONFIG_MODULES), $(MODORDER)) FORCE $(call if_changed,gen_compile_commands) diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index 2bb8cbeedf9160778a7bff1a444f8bc925612749..b191d87f89c4012849276b7b7b67c69ed974778a 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -534,8 +534,10 @@ extern inline void writeq(u64 b, volatile void __iomem *addr) #define ioread16be(p) swab16(ioread16(p)) #define ioread32be(p) swab32(ioread32(p)) +#define ioread64be(p) swab64(ioread64(p)) #define iowrite16be(v,p) iowrite16(swab16(v), (p)) #define iowrite32be(v,p) iowrite32(swab32(v), (p)) +#define iowrite64be(v,p) iowrite64(swab64(v), (p)) #define inb_p inb #define inw_p inw @@ -634,8 +636,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count); */ #define ioread64 ioread64 #define iowrite64 iowrite64 -#define ioread64be ioread64be -#define iowrite64be iowrite64be #define ioread8_rep ioread8_rep #define ioread16_rep ioread16_rep #define ioread32_rep ioread32_rep diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 954a1916a5003bcbb60f3766fad981d6a0b4e8e2..54b2bb817a7fc0615ad94b88fd4698512f29c0c5 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -87,6 +87,7 @@ config ARM select HAVE_ARCH_PFN_VALID select HAVE_ARCH_SECCOMP select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT + select HAVE_ARCH_STACKLEAK select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE @@ -116,6 +117,7 @@ config ARM select HAVE_KERNEL_XZ select HAVE_KPROBES if !XIP_KERNEL && !CPU_ENDIAN_BE32 && !CPU_V7M select HAVE_KRETPROBES if HAVE_KPROBES + select HAVE_LD_DEAD_CODE_DATA_ELIMINATION select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_OPTPROBES if !THUMB2_KERNEL @@ -736,7 +738,7 @@ config ARM_ERRATA_764319 bool "ARM errata: Read to DBGPRSR and DBGOSLSR may generate Undefined instruction" depends on CPU_V7 help - This option enables the workaround for the 764319 Cortex A-9 erratum. + This option enables the workaround for the 764319 Cortex-A9 erratum. CP14 read accesses to the DBGPRSR and DBGOSLSR registers generate an unexpected Undefined Instruction exception when the DBGSWENABLE external pin is set to 0, even when the CP14 accesses are performed diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 6bca03c0c7f0e28e832187366d96b77ea0dfc614..945b5975fce20008c9c5ed15e2fb05894357efd9 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -9,6 +9,7 @@ OBJS = HEAD = head.o OBJS += misc.o decompress.o +CFLAGS_decompress.o += $(DISABLE_STACKLEAK_PLUGIN) ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y) OBJS += debug.o AFLAGS_head.o += -DDEBUG diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index 3fcb3e62dc5692c5ea8ccdb47d97c169d7b0ac76..d411abd4310ea97f14401cd73b21d77250af095f 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -125,7 +125,7 @@ SECTIONS . = BSS_START; __bss_start = .; - .bss : { *(.bss) } + .bss : { *(.bss .bss.*) } _end = .; . = ALIGN(8); /* the stack must be 64-bit aligned */ diff --git a/arch/arm/boot/dts/arm/versatile-ab.dts b/arch/arm/boot/dts/arm/versatile-ab.dts index 6fe6b49f5d8ecfe1266a36130864697c909f5c81..635ab92688999aa9f1dff02ba59860104d349b5f 100644 --- a/arch/arm/boot/dts/arm/versatile-ab.dts +++ b/arch/arm/boot/dts/arm/versatile-ab.dts @@ -157,7 +157,7 @@ clocks = <&xtal24mhz>; }; - pclk: clock-24000000 { + pclk: clock-pclk { #clock-cells = <0>; compatible = "fixed-factor-clock"; clock-div = <1>; diff --git a/arch/arm/include/asm/stacktrace.h b/arch/arm/include/asm/stacktrace.h index 360f0d2406bfa08364086e885157a3a02459c277..f80a85b091d6c4ff365e15ae7100af1c3aed597f 100644 --- a/arch/arm/include/asm/stacktrace.h +++ b/arch/arm/include/asm/stacktrace.h @@ -26,6 +26,13 @@ struct stackframe { #endif }; +static inline bool on_thread_stack(void) +{ + unsigned long delta = current_stack_pointer ^ (unsigned long)current->stack; + + return delta < THREAD_SIZE; +} + static __always_inline void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame) { diff --git a/arch/arm/include/asm/vmlinux.lds.h b/arch/arm/include/asm/vmlinux.lds.h index 4c8632d5c432c3c7e9351ebae65a90ee9a4c31db..d60f6e83a9f700f3fc5fe1e08b09c7263f49bf79 100644 --- a/arch/arm/include/asm/vmlinux.lds.h +++ b/arch/arm/include/asm/vmlinux.lds.h @@ -42,7 +42,7 @@ #define PROC_INFO \ . = ALIGN(4); \ __proc_info_begin = .; \ - *(.proc.info.init) \ + KEEP(*(.proc.info.init)) \ __proc_info_end = .; #define IDMAP_TEXT \ diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 6150a716828c3eca335ed52df75fe7fdcf40eb57..f01d23a220e65d20789a7aeebeffb629fcaf126b 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -1065,6 +1065,7 @@ vector_addrexcptn: .globl vector_fiq .section .vectors, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_swi ) @@ -1078,6 +1079,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_swi ) #ifdef CONFIG_HARDEN_BRANCH_HISTORY .section .vectors.bhb.loop8, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_loop8_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_loop8_swi ) @@ -1090,6 +1092,7 @@ THUMB( .reloc ., R_ARM_THM_PC12, .L__vector_bhb_loop8_swi ) W(b) vector_bhb_loop8_fiq .section .vectors.bhb.bpiall, "ax", %progbits + .reloc .text, R_ARM_NONE, . W(b) vector_rst W(b) vector_bhb_bpiall_und ARM( .reloc ., R_ARM_LDR_PC_G0, .L__vector_bhb_bpiall_swi ) diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 5c31e9de7a602e22a65f64c12cf2819be9fa56d6..f379c852dcb72fa5e960bba5621cc2152f0df0d3 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -119,6 +119,9 @@ no_work_pending: ct_user_enter save = 0 +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK + bl stackleak_erase_on_task_stack +#endif restore_user_regs fast = 0, offset = 0 ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index 677f218f7e841acc1f7c392a3216e2bd848413e8..da488d92e7a00d0230d7b10c623a57773371ab3f 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -395,11 +395,6 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } -struct mod_unwind_map { - const Elf_Shdr *unw_sec; - const Elf_Shdr *txt_sec; -}; - static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, const char *name) { diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c index 7147edbe56c67c41915c5fa8ae11a3b5493261d3..1d230ac9d0eb5c86fd5d0516b9b4b07c1f9b173b 100644 --- a/arch/arm/kernel/perf_callchain.c +++ b/arch/arm/kernel/perf_callchain.c @@ -85,8 +85,7 @@ static bool callchain_trace(void *data, unsigned long pc) { struct perf_callchain_entry_ctx *entry = data; - perf_callchain_store(entry, pc); - return true; + return perf_callchain_store(entry, pc) == 0; } void diff --git a/arch/arm/kernel/vmlinux-xip.lds.S b/arch/arm/kernel/vmlinux-xip.lds.S index c16d196b5aad3e159c160295fd21c75c3cb7055b..5eddb75a7174083f7bf143798f339171540c8475 100644 --- a/arch/arm/kernel/vmlinux-xip.lds.S +++ b/arch/arm/kernel/vmlinux-xip.lds.S @@ -63,7 +63,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -83,7 +83,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index bd9127c4b451015bb67b64cc273ff10ebb68a398..de373c6c2ae8780f190bef7357c4d70f3b3064cc 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS . = ALIGN(4); __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { __start___ex_table = .; - ARM_MMU_KEEP(*(__ex_table)) + ARM_MMU_KEEP(KEEP(*(__ex_table))) __stop___ex_table = .; } @@ -99,7 +99,7 @@ SECTIONS } .init.arch.info : { __arch_info_begin = .; - *(.arch.info.init) + KEEP(*(.arch.info.init)) __arch_info_end = .; } .init.tagtable : { @@ -116,7 +116,7 @@ SECTIONS #endif .init.pv_table : { __pv_table_begin = .; - *(.pv_table) + KEEP(*(.pv_table)) __pv_table_end = .; } diff --git a/arch/arm/mach-alpine/alpine_cpu_pm.c b/arch/arm/mach-alpine/alpine_cpu_pm.c index 13ae8412e9ce6451cbf1820c07db66425679d65b..b48da6f12b6cc46a9b619bfeb8630ea94dc6f21e 100644 --- a/arch/arm/mach-alpine/alpine_cpu_pm.c +++ b/arch/arm/mach-alpine/alpine_cpu_pm.c @@ -29,7 +29,7 @@ int alpine_cpu_wakeup(unsigned int phys_cpu, uint32_t phys_resume_addr) /* * Set CPU resume address - * secure firmware running on boot will jump to this address - * after setting proper CPU mode, and initialiing e.g. secure + * after setting proper CPU mode, and initializing e.g. secure * regs (the same mode all CPUs are booted to - usually HYP) */ writel(phys_resume_addr, diff --git a/arch/arm/mach-pxa/gumstix.c b/arch/arm/mach-pxa/gumstix.c index efa6faa62a2cf0cefbfb3abd450bc176d0f49dba..1713bdf3b71e4b7541de751e32c1dd523ddf71fd 100644 --- a/arch/arm/mach-pxa/gumstix.c +++ b/arch/arm/mach-pxa/gumstix.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -40,6 +41,7 @@ #include #include "udc.h" #include "gumstix.h" +#include "devices.h" #include "generic.h" @@ -99,8 +101,8 @@ static void __init gumstix_mmc_init(void) } #endif -#ifdef CONFIG_USB_PXA25X -static const struct property_entry spitz_mci_props[] __initconst = { +#if IS_ENABLED(CONFIG_USB_PXA25X) +static const struct property_entry gumstix_vbus_props[] __initconst = { PROPERTY_ENTRY_GPIO("vbus-gpios", &pxa2xx_gpiochip_node, GPIO_GUMSTIX_USB_GPIOn, GPIO_ACTIVE_HIGH), PROPERTY_ENTRY_GPIO("pullup-gpios", &pxa2xx_gpiochip_node, @@ -109,8 +111,9 @@ static const struct property_entry spitz_mci_props[] __initconst = { }; static const struct platform_device_info gumstix_gpio_vbus_info __initconst = { - .name = "gpio-vbus", - .id = PLATFORM_DEVID_NONE, + .name = "gpio-vbus", + .id = PLATFORM_DEVID_NONE, + .properties = gumstix_vbus_props, }; static void __init gumstix_udc_init(void) diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index c30df1097c524b4c701834e093775662802d9341..9f7454b8efa79e9c3c1add236242326d0e946710 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -1109,7 +1109,7 @@ void ecard_remove_driver(struct ecard_driver *drv) driver_unregister(&drv->drv); } -static int ecard_match(struct device *_dev, struct device_driver *_drv) +static int ecard_match(struct device *_dev, const struct device_driver *_drv) { struct expansion_card *ec = ECARD_DEV(_dev); struct ecard_driver *drv = ECARD_DRV(_drv); diff --git a/arch/arm/mm/proc.c b/arch/arm/mm/proc.c index bdbbf65d1b3669c5e05708b20c166683fb8c8f41..2027845efefb608fb4bb1bff1a744dc1ae0b8e0a 100644 --- a/arch/arm/mm/proc.c +++ b/arch/arm/mm/proc.c @@ -17,7 +17,7 @@ void cpu_arm7tdmi_proc_init(void); __ADDRESSABLE(cpu_arm7tdmi_proc_init); void cpu_arm7tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm7tdmi_proc_fin); -void cpu_arm7tdmi_reset(void); +void cpu_arm7tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm7tdmi_reset); int cpu_arm7tdmi_do_idle(void); __ADDRESSABLE(cpu_arm7tdmi_do_idle); @@ -32,7 +32,7 @@ void cpu_arm720_proc_init(void); __ADDRESSABLE(cpu_arm720_proc_init); void cpu_arm720_proc_fin(void); __ADDRESSABLE(cpu_arm720_proc_fin); -void cpu_arm720_reset(void); +void cpu_arm720_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm720_reset); int cpu_arm720_do_idle(void); __ADDRESSABLE(cpu_arm720_do_idle); @@ -49,7 +49,7 @@ void cpu_arm740_proc_init(void); __ADDRESSABLE(cpu_arm740_proc_init); void cpu_arm740_proc_fin(void); __ADDRESSABLE(cpu_arm740_proc_fin); -void cpu_arm740_reset(void); +void cpu_arm740_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm740_reset); int cpu_arm740_do_idle(void); __ADDRESSABLE(cpu_arm740_do_idle); @@ -64,7 +64,7 @@ void cpu_arm9tdmi_proc_init(void); __ADDRESSABLE(cpu_arm9tdmi_proc_init); void cpu_arm9tdmi_proc_fin(void); __ADDRESSABLE(cpu_arm9tdmi_proc_fin); -void cpu_arm9tdmi_reset(void); +void cpu_arm9tdmi_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm9tdmi_reset); int cpu_arm9tdmi_do_idle(void); __ADDRESSABLE(cpu_arm9tdmi_do_idle); @@ -79,7 +79,7 @@ void cpu_arm920_proc_init(void); __ADDRESSABLE(cpu_arm920_proc_init); void cpu_arm920_proc_fin(void); __ADDRESSABLE(cpu_arm920_proc_fin); -void cpu_arm920_reset(void); +void cpu_arm920_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm920_reset); int cpu_arm920_do_idle(void); __ADDRESSABLE(cpu_arm920_do_idle); @@ -102,7 +102,7 @@ void cpu_arm922_proc_init(void); __ADDRESSABLE(cpu_arm922_proc_init); void cpu_arm922_proc_fin(void); __ADDRESSABLE(cpu_arm922_proc_fin); -void cpu_arm922_reset(void); +void cpu_arm922_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm922_reset); int cpu_arm922_do_idle(void); __ADDRESSABLE(cpu_arm922_do_idle); @@ -119,7 +119,7 @@ void cpu_arm925_proc_init(void); __ADDRESSABLE(cpu_arm925_proc_init); void cpu_arm925_proc_fin(void); __ADDRESSABLE(cpu_arm925_proc_fin); -void cpu_arm925_reset(void); +void cpu_arm925_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm925_reset); int cpu_arm925_do_idle(void); __ADDRESSABLE(cpu_arm925_do_idle); @@ -159,7 +159,7 @@ void cpu_arm940_proc_init(void); __ADDRESSABLE(cpu_arm940_proc_init); void cpu_arm940_proc_fin(void); __ADDRESSABLE(cpu_arm940_proc_fin); -void cpu_arm940_reset(void); +void cpu_arm940_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm940_reset); int cpu_arm940_do_idle(void); __ADDRESSABLE(cpu_arm940_do_idle); @@ -174,7 +174,7 @@ void cpu_arm946_proc_init(void); __ADDRESSABLE(cpu_arm946_proc_init); void cpu_arm946_proc_fin(void); __ADDRESSABLE(cpu_arm946_proc_fin); -void cpu_arm946_reset(void); +void cpu_arm946_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_arm946_reset); int cpu_arm946_do_idle(void); __ADDRESSABLE(cpu_arm946_do_idle); @@ -429,7 +429,7 @@ void cpu_v7_proc_init(void); __ADDRESSABLE(cpu_v7_proc_init); void cpu_v7_proc_fin(void); __ADDRESSABLE(cpu_v7_proc_fin); -void cpu_v7_reset(void); +void cpu_v7_reset(unsigned long addr, bool hvc); __ADDRESSABLE(cpu_v7_reset); int cpu_v7_do_idle(void); __ADDRESSABLE(cpu_v7_do_idle); diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b3fc891f154429d8c56c99f1cccddfd78a43f0b8..a2f8ff354ca670af50e44c790c6c201b63aa0ddb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1069,18 +1069,28 @@ config ARM64_ERRATUM_3117295 If unsure, say Y. config ARM64_ERRATUM_3194386 - bool "Cortex-{A720,X4,X925}/Neoverse-V3: workaround for MSR SSBS not self-synchronizing" + bool "Cortex-*/Neoverse-*: workaround for MSR SSBS not self-synchronizing" default y help This option adds the workaround for the following errata: + * ARM Cortex-A76 erratum 3324349 + * ARM Cortex-A77 erratum 3324348 + * ARM Cortex-A78 erratum 3324344 + * ARM Cortex-A78C erratum 3324346 + * ARM Cortex-A78C erratum 3324347 * ARM Cortex-A710 erratam 3324338 * ARM Cortex-A720 erratum 3456091 + * ARM Cortex-A725 erratum 3456106 + * ARM Cortex-X1 erratum 3324344 + * ARM Cortex-X1C erratum 3324346 * ARM Cortex-X2 erratum 3324338 * ARM Cortex-X3 erratum 3324335 * ARM Cortex-X4 erratum 3194386 * ARM Cortex-X925 erratum 3324334 + * ARM Neoverse-N1 erratum 3324349 * ARM Neoverse N2 erratum 3324339 + * ARM Neoverse-V1 erratum 3324341 * ARM Neoverse V2 erratum 3324336 * ARM Neoverse-V3 erratum 3312417 @@ -1088,11 +1098,11 @@ config ARM64_ERRATUM_3194386 subsequent speculative instructions, which may permit unexepected speculative store bypassing. - Work around this problem by placing a speculation barrier after - kernel changes to SSBS. The presence of the SSBS special-purpose - register is hidden from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such - that userspace will use the PR_SPEC_STORE_BYPASS prctl to change - SSBS. + Work around this problem by placing a Speculation Barrier (SB) or + Instruction Synchronization Barrier (ISB) after kernel changes to + SSBS. The presence of the SSBS special-purpose register is hidden + from hwcaps and EL0 reads of ID_AA64PFR1_EL1, such that userspace + will use the PR_SPEC_STORE_BYPASS prctl to change SSBS. If unsure, say Y. diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi index e8f4d136e5dfb4dd67c16f450141ebf9cd498b24..9202181fbd6528e92f6f440bb0f48dc4fde2674b 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin-dahlia.dtsi @@ -43,15 +43,6 @@ sound-dai = <&mcasp0>; }; }; - - reg_usb_hub: regulator-usb-hub { - compatible = "regulator-fixed"; - enable-active-high; - /* Verdin CTRL_SLEEP_MOCI# (SODIMM 256) */ - gpio = <&main_gpio0 31 GPIO_ACTIVE_HIGH>; - regulator-boot-on; - regulator-name = "HUB_PWR_EN"; - }; }; /* Verdin ETHs */ @@ -193,11 +184,6 @@ status = "okay"; }; -/* Do not force CTRL_SLEEP_MOCI# always enabled */ -®_force_sleep_moci { - status = "disabled"; -}; - /* Verdin SD_1 */ &sdhci1 { status = "okay"; @@ -218,15 +204,7 @@ }; &usb1 { - #address-cells = <1>; - #size-cells = <0>; status = "okay"; - - usb-hub@1 { - compatible = "usb424,2744"; - reg = <1>; - vdd-supply = <®_usb_hub>; - }; }; /* Verdin CTRL_WAKE1_MICO# */ diff --git a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi index 359f53f3e019b1dbdf01017bff1a42af0161bf44..5bef31b8577be52133a4e151727ba8579d0e51ae 100644 --- a/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62-verdin.dtsi @@ -138,12 +138,6 @@ vin-supply = <®_1v8>; }; - /* - * By default we enable CTRL_SLEEP_MOCI#, this is required to have - * peripherals on the carrier board powered. - * If more granularity or power saving is required this can be disabled - * in the carrier board device tree files. - */ reg_force_sleep_moci: regulator-force-sleep-moci { compatible = "regulator-fixed"; enable-active-high; diff --git a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi index e65db6ce02bf668ace388a7a547563584056d1d9..df7945156397b14015661193c93a006308501bed 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-j722s-common-mcu.dtsi @@ -146,6 +146,8 @@ power-domains = <&k3_pds 79 TI_SCI_PD_EXCLUSIVE>; clocks = <&k3_clks 79 0>; clock-names = "gpio"; + gpio-ranges = <&mcu_pmx0 0 0 21>, <&mcu_pmx0 21 23 1>, + <&mcu_pmx0 22 32 2>; }; mcu_rti0: watchdog@4880000 { diff --git a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi index 57383bd2eaeb41cf6360411e6a08fc2fdd834bb1..0ce9721b417653850c08e8a57267bdafef66b38c 100644 --- a/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am62p-main.dtsi @@ -45,7 +45,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 92 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 22 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 149 2 PIN_GPIO_RANGE_IOPAD>; diff --git a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi index c797980528ec7c10a59703374b920d3c9ef91c9a..dde4bd5c664539dc8b24ed4d9e05d09e1bf07ba4 100644 --- a/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j722s-main.dtsi @@ -193,7 +193,8 @@ &main_pmx0 { pinctrl-single,gpio-range = <&main_pmx0_range 0 32 PIN_GPIO_RANGE_IOPAD>, - <&main_pmx0_range 33 55 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 33 38 PIN_GPIO_RANGE_IOPAD>, + <&main_pmx0_range 72 17 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 101 25 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 137 5 PIN_GPIO_RANGE_IOPAD>, <&main_pmx0_range 143 3 PIN_GPIO_RANGE_IOPAD>, diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts index 9338d987180d7560e56e4b720b50fc208480699f..ffa38f41679d8436f570682dc6133c53f51bbef3 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts +++ b/arch/arm64/boot/dts/ti/k3-j784s4-evm.dts @@ -1262,6 +1262,14 @@ &serdes0 { status = "okay"; + serdes0_pcie1_link: phy@0 { + reg = <0>; + cdns,num-lanes = <2>; + #phy-cells = <0>; + cdns,phy-type = ; + resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>; + }; + serdes0_usb_link: phy@3 { reg = <3>; cdns,num-lanes = <1>; @@ -1386,23 +1394,6 @@ phys = <&transceiver3>; }; -&serdes0 { - status = "okay"; - - serdes0_pcie1_link: phy@0 { - reg = <0>; - cdns,num-lanes = <4>; - #phy-cells = <0>; - cdns,phy-type = ; - resets = <&serdes_wiz0 1>, <&serdes_wiz0 2>, - <&serdes_wiz0 3>, <&serdes_wiz0 4>; - }; -}; - -&serdes_wiz0 { - status = "okay"; -}; - &pcie1_rc { status = "okay"; num-lanes = <2>; diff --git a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi index f170f80f00c1b84bea97d243e51866777bd84376..d4ac1c9872a5e70ff9bf9e3f6146d92fce780a34 100644 --- a/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi +++ b/arch/arm64/boot/dts/ti/k3-j784s4-main.dtsi @@ -2755,7 +2755,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc500>, <&main_udmap 0x4500>; + dmas = <&main_udmap 0xc403>, <&main_udmap 0x4403>; dma-names = "tx", "rx"; clocks = <&k3_clks 268 0>; clock-names = "fck"; @@ -2773,7 +2773,7 @@ interrupts = , ; interrupt-names = "tx", "rx"; - dmas = <&main_udmap 0xc501>, <&main_udmap 0x4501>; + dmas = <&main_udmap 0xc404>, <&main_udmap 0x4404>; dma-names = "tx", "rx"; clocks = <&k3_clks 269 0>; clock-names = "fck"; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 1cb0704c6163f8ab16420f0aff6b5429688b96f9..5fd7caea441936246ce9ddb7b5d7bdf04b30d14b 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -86,12 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C #define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F #define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 #define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -165,12 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) #define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) #define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) #define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/arch/arm64/include/asm/jump_label.h b/arch/arm64/include/asm/jump_label.h index 4e753908b80188f0e58b50a0048e1482e32ec94f..a0a5bbae7229e8a791748ebaa193c1ccd118ff1c 100644 --- a/arch/arm64/include/asm/jump_label.h +++ b/arch/arm64/include/asm/jump_label.h @@ -13,6 +13,7 @@ #include #include +#define HAVE_JUMP_LABEL_BATCH #define JUMP_LABEL_NOP_SIZE AARCH64_INSN_SIZE #define JUMP_TABLE_ENTRY(key, label) \ diff --git a/arch/arm64/include/asm/kvm_ptrauth.h b/arch/arm64/include/asm/kvm_ptrauth.h index d81bac256abc3599e2603ce4d15d2c47a3d6cf6a..6199c9f7ec6eda2fa7c677cf4a755928eb747e84 100644 --- a/arch/arm64/include/asm/kvm_ptrauth.h +++ b/arch/arm64/include/asm/kvm_ptrauth.h @@ -104,7 +104,7 @@ alternative_else_nop_endif #define __ptrauth_save_key(ctxt, key) \ do { \ - u64 __val; \ + u64 __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYLO_EL1); \ ctxt_sys_reg(ctxt, key ## KEYLO_EL1) = __val; \ __val = read_sysreg_s(SYS_ ## key ## KEYHI_EL1); \ diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index 28f665e0975a28cf403ae7a19ce63add56e4d94c..1aa4ecb73429fed92d3863e18e559099cdd913bf 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -188,7 +188,7 @@ static inline void __user *__uaccess_mask_ptr(const void __user *ptr) #define __get_mem_asm(load, reg, x, addr, label, type) \ asm_goto_output( \ "1: " load " " reg "0, [%1]\n" \ - _ASM_EXTABLE_##type##ACCESS_ERR(1b, %l2, %w0) \ + _ASM_EXTABLE_##type##ACCESS(1b, %l2) \ : "=r" (x) \ : "r" (addr) : : label) #else diff --git a/arch/arm64/kernel/Makefile.syscalls b/arch/arm64/kernel/Makefile.syscalls index 3cfafd003b2db6868f234bd53240aa6e6f715b0c..0542a718871afa712446f8c674e680e9d187bb09 100644 --- a/arch/arm64/kernel/Makefile.syscalls +++ b/arch/arm64/kernel/Makefile.syscalls @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += -syscall_abis_64 += renameat newstat rlimit memfd_secret +syscall_abis_64 += renameat rlimit memfd_secret syscalltbl = arch/arm64/tools/syscall_%.tbl diff --git a/arch/arm64/kernel/acpi_numa.c b/arch/arm64/kernel/acpi_numa.c index 0c036a9a3c338ae81e0e31626c54948f227e312d..2465f291c7e17cc8e1d02b3a23fb96cf5e71a22c 100644 --- a/arch/arm64/kernel/acpi_numa.c +++ b/arch/arm64/kernel/acpi_numa.c @@ -27,7 +27,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 617424b73f8c3251aba6a8a1680029dc39ac09d8..f6b6b4507357158c8e3a26881f7ceca1cdd20646 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -434,15 +434,24 @@ static const struct midr_range erratum_spec_unpriv_load_list[] = { #ifdef CONFIG_ARM64_ERRATUM_3194386 static const struct midr_range erratum_spec_ssbs_list[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A76), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A77), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C), MIDR_ALL_VERSIONS(MIDR_CORTEX_A710), MIDR_ALL_VERSIONS(MIDR_CORTEX_A720), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A725), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1), + MIDR_ALL_VERSIONS(MIDR_CORTEX_X1C), MIDR_ALL_VERSIONS(MIDR_CORTEX_X2), MIDR_ALL_VERSIONS(MIDR_CORTEX_X3), MIDR_ALL_VERSIONS(MIDR_CORTEX_X4), MIDR_ALL_VERSIONS(MIDR_CORTEX_X925), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N2), - MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1), MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V2), + MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V3), {} }; #endif diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c index faf88ec9c48e8a91487329fffb7f9d20fd3b6854..f63ea915d6ad250d56d20ac5e95ec203a641974c 100644 --- a/arch/arm64/kernel/jump_label.c +++ b/arch/arm64/kernel/jump_label.c @@ -7,11 +7,12 @@ */ #include #include +#include #include #include -void arch_jump_label_transform(struct jump_entry *entry, - enum jump_label_type type) +bool arch_jump_label_transform_queue(struct jump_entry *entry, + enum jump_label_type type) { void *addr = (void *)jump_entry_code(entry); u32 insn; @@ -25,4 +26,10 @@ void arch_jump_label_transform(struct jump_entry *entry, } aarch64_insn_patch_text_nosync(addr, insn); + return true; +} + +void arch_jump_label_transform_apply(void) +{ + kick_all_cpus_sync(); } diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index a096e2451044d35bad506ecb67edcf3f2d30b87c..b22d28ec80284b1c4f593a4ffc51ed2b7012f7a3 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -355,9 +355,6 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) smp_init_cpus(); smp_build_mpidr_hash(); - /* Init percpu seeds for random tags after cpus are set up. */ - kasan_init_sw_tags(); - #ifdef CONFIG_ARM64_SW_TTBR0_PAN /* * Make sure init_thread_info.ttbr0 always generates translation diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 5e18fbcee9a20cb3605b1421b32343613fde0dfd..f01f0fd7b7feb4a98c09d9184a703f5e76a89ce6 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -467,6 +467,8 @@ void __init smp_prepare_boot_cpu(void) init_gic_priority_masking(); kasan_init_hw_tags(); + /* Init percpu seeds for random tags after cpus are set up. */ + kasan_init_sw_tags(); } /* diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 58f09370d17e017614c690000f55e9b3b816d77c..8304eb342be9d99687b16c549a0933caf7060aca 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -19,6 +19,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" + depends on AS_HAS_ARMV8_4 select KVM_COMMON select KVM_GENERIC_HARDWARE_ENABLING select KVM_GENERIC_MMU_NOTIFIER diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a6497228c5a8c37d9159a6f99b162b22f18192ca..86a629aaf0a13fe59e28f8a7ad35d7269ec0dfd6 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -10,6 +10,9 @@ include $(srctree)/virt/kvm/Makefile.kvm obj-$(CONFIG_KVM) += kvm.o obj-$(CONFIG_KVM) += hyp/ +CFLAGS_sys_regs.o += -Wno-override-init +CFLAGS_handle_exit.o += -Wno-override-init + kvm-y += arm.o mmu.o mmio.o psci.o hypercalls.o pvtime.o \ inject_fault.o va_layout.o handle_exit.o \ guest.o debug.o reset.o sys_regs.o stacktrace.o \ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index a7ca776b51ec8a1425d457206f35811eb26fa77c..9bef7638342ef748d10277c905091e458bdff2af 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -164,6 +164,7 @@ static int kvm_arm_default_max_vcpus(void) /** * kvm_arch_init_vm - initializes a VM data structure * @kvm: pointer to the KVM struct + * @type: kvm device type */ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { @@ -521,10 +522,10 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) { - if (vcpu_has_ptrauth(vcpu)) { + if (vcpu_has_ptrauth(vcpu) && !is_protected_kvm_enabled()) { /* - * Either we're running running an L2 guest, and the API/APK - * bits come from L1's HCR_EL2, or API/APK are both set. + * Either we're running an L2 guest, and the API/APK bits come + * from L1's HCR_EL2, or API/APK are both set. */ if (unlikely(vcpu_has_nv(vcpu) && !is_hyp_ctxt(vcpu))) { u64 val; @@ -541,16 +542,10 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu) * Save the host keys if there is any chance for the guest * to use pauth, as the entry code will reload the guest * keys in that case. - * Protected mode is the exception to that rule, as the - * entry into the EL2 code eagerly switch back and forth - * between host and hyp keys (and kvm_hyp_ctxt is out of - * reach anyway). */ - if (is_protected_kvm_enabled()) - return; - if (vcpu->arch.hcr_el2 & (HCR_API | HCR_APK)) { struct kvm_cpu_context *ctxt; + ctxt = this_cpu_ptr_hyp_sym(kvm_hyp_ctxt); ptrauth_save_keys(ctxt); } diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index f59ccfe11ab9ad473d96c13cb3da4dfc18a043b5..37ff87d782b62bdef9ccd312537079283bf5654d 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -27,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 782b34b004be3c39a35e597dd8429b12f9a3976b..b43426a493df5a388caa920e259cc8c54d118a1b 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -20,6 +20,8 @@ HOST_EXTRACFLAGS += -I$(objtree)/include lib-objs := clear_page.o copy_page.o memcpy.o memset.o lib-objs := $(addprefix ../../../lib/, $(lib-objs)) +CFLAGS_switch.nvhe.o += -Wno-override-init + hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o page_alloc.o \ cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 6af179c6356d666b68e426429572ce6d559bbd82..8f5c56d5b1cdf5c0d65774a8714a79ac95cc709f 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -173,9 +173,8 @@ static void __pmu_switch_to_host(struct kvm_vcpu *vcpu) static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { /* - * Make sure we handle the exit for workarounds and ptrauth - * before the pKVM handling, as the latter could decide to - * UNDEF. + * Make sure we handle the exit for workarounds before the pKVM + * handling, as the latter could decide to UNDEF. */ return (kvm_hyp_handle_sysreg(vcpu, exit_code) || kvm_handle_pvm_sysreg(vcpu, exit_code)); diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 3b9e5464b5b39ca07633751f2eea412c03d672f9..afc4aed9231ac0518b84131ec0bae4289eefbd1b 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -6,6 +6,8 @@ asflags-y := -D__KVM_VHE_HYPERVISOR__ ccflags-y := -D__KVM_VHE_HYPERVISOR__ +CFLAGS_switch.o += -Wno-override-init + obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 6981b1bc094686e8bea69fc121247ff8150f892f..a509b63bd4dd50d462e779f5511e6b06687cf73a 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1540,8 +1540,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, vma_pagesize = min(vma_pagesize, (long)max_map_size); } - if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) + /* + * Both the canonical IPA and fault IPA must be hugepage-aligned to + * ensure we find the right PFN and lay down the mapping in the right + * place. + */ + if (vma_pagesize == PMD_SIZE || vma_pagesize == PUD_SIZE) { fault_ipa &= ~(vma_pagesize - 1); + ipa &= ~(vma_pagesize - 1); + } gfn = ipa >> PAGE_SHIFT; mte_allowed = kvm_vma_mte_allowed(vma); diff --git a/arch/arm64/kvm/nested.c b/arch/arm64/kvm/nested.c index de789e0f1ae9cb6e1bc7ba0bd7bfe691df0143af..bab27f9d8cc659531291830ec79c13eb62aac4d0 100644 --- a/arch/arm64/kvm/nested.c +++ b/arch/arm64/kvm/nested.c @@ -786,7 +786,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm) if (!WARN_ON(atomic_read(&mmu->refcnt))) kvm_free_stage2_pgd(mmu); } - kfree(kvm->arch.nested_mmus); + kvfree(kvm->arch.nested_mmus); kvm->arch.nested_mmus = NULL; kvm->arch.nested_mmus_size = 0; kvm_uninit_stage2_mmu(kvm); diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index c90324060436b272fd0f7dde8e023a7911edcc7b..31e49da867ffc32ecb66bf30bd0e667ef441170f 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -33,6 +33,7 @@ #include #include "sys_regs.h" +#include "vgic/vgic.h" #include "trace.h" @@ -435,6 +436,11 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, { bool g1; + if (!kvm_has_gicv3(vcpu->kvm)) { + kvm_inject_undefined(vcpu); + return false; + } + if (!p->is_write) return read_from_write_only(vcpu, p, r); diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c index bcbc8c986b1d63e2942869d66cd33cbfca5039ac..e1397ab2072a57b8357ec73bfe7574841a74e5ef 100644 --- a/arch/arm64/kvm/vgic/vgic-debug.c +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -45,7 +45,8 @@ static void iter_next(struct kvm *kvm, struct vgic_state_iter *iter) * Let the xarray drive the iterator after the last SPI, as the iterator * has exhausted the sequentially-allocated INTID space. */ - if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1)) { + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS - 1) && + iter->nr_lpis) { if (iter->lpi_idx < iter->nr_lpis) xa_find_after(&dist->lpi_xa, &iter->intid, VGIC_LPI_MAX_INTID, @@ -84,7 +85,7 @@ static void iter_unmark_lpis(struct kvm *kvm) struct vgic_irq *irq; unsigned long intid; - xa_for_each(&dist->lpi_xa, intid, irq) { + xa_for_each_marked(&dist->lpi_xa, intid, irq, LPI_XA_MARK_DEBUG_ITER) { xa_clear_mark(&dist->lpi_xa, intid, LPI_XA_MARK_DEBUG_ITER); vgic_put_irq(kvm, irq); } @@ -112,7 +113,7 @@ static bool end_of_vgic(struct vgic_state_iter *iter) return iter->dist_id > 0 && iter->vcpu_id == iter->nr_cpus && iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && - iter->lpi_idx > iter->nr_lpis; + (!iter->nr_lpis || iter->lpi_idx > iter->nr_lpis); } static void *vgic_debug_start(struct seq_file *s, loff_t *pos) diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 7f68cf58b978fb11c78b4b0bb6d9c15a0625c45b..e7c53e8af3d165379a609cbac4cc8ef2bd3be6fd 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -417,10 +417,8 @@ static void __kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->private_irqs); vgic_cpu->private_irqs = NULL; - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { - vgic_unregister_redist_iodev(vcpu); + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; - } } void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) @@ -438,17 +436,21 @@ void kvm_vgic_destroy(struct kvm *kvm) unsigned long i; mutex_lock(&kvm->slots_lock); + mutex_lock(&kvm->arch.config_lock); vgic_debug_destroy(kvm); kvm_for_each_vcpu(i, vcpu, kvm) __kvm_vgic_vcpu_destroy(vcpu); - mutex_lock(&kvm->arch.config_lock); - kvm_vgic_dist_destroy(kvm); mutex_unlock(&kvm->arch.config_lock); + + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + kvm_for_each_vcpu(i, vcpu, kvm) + vgic_unregister_redist_iodev(vcpu); + mutex_unlock(&kvm->slots_lock); } diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 8c711deb25aa006bc234f59de60cfcc015ed46b7..c314c016659abe6688d54ada383ad2cfb1495e8a 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -9,7 +9,7 @@ #include #include "vgic.h" -/** +/* * vgic_irqfd_set_irq: inject the IRQ corresponding to the * irqchip routing entry * @@ -75,7 +75,8 @@ static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, msi->flags = e->msi.flags; msi->devid = e->msi.devid; } -/** + +/* * kvm_set_msi: inject the MSI corresponding to the * MSI routing entry * @@ -98,7 +99,7 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, return vgic_its_inject_msi(kvm, &msi); } -/** +/* * kvm_arch_set_irq_inatomic: fast-path for irqfd injection */ int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 40bb43f20bf34794204fb6c4008316b817770e9a..ba945ba78cc7d7e749af5ae932d63a4bfcf6f643 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2040,6 +2040,7 @@ typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, * @start_id: the ID of the first entry in the table * (non zero for 2d level tables) * @fn: function to apply on each entry + * @opaque: pointer to opaque data * * Return: < 0 on error, 0 if last element was identified, 1 otherwise * (the last element may not be found on second level tables) @@ -2079,7 +2080,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, return 1; } -/** +/* * vgic_its_save_ite - Save an interrupt translation entry at @gpa */ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, @@ -2099,6 +2100,8 @@ static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, /** * vgic_its_restore_ite - restore an interrupt translation entry + * + * @its: its handle * @event_id: id used for indexing * @ptr: pointer to the ITE entry * @opaque: pointer to the its_device @@ -2231,6 +2234,7 @@ static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) * @its: ITS handle * @dev: ITS device * @ptr: GPA + * @dte_esz: device table entry size */ static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, gpa_t ptr, int dte_esz) @@ -2313,7 +2317,7 @@ static int vgic_its_device_cmp(void *priv, const struct list_head *a, return 1; } -/** +/* * vgic_its_save_device_tables - Save the device table and all ITT * into guest RAM * @@ -2386,7 +2390,7 @@ static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, return ret; } -/** +/* * vgic_its_restore_device_tables - Restore the device table and all ITT * from guest RAM to internal data structs */ @@ -2478,7 +2482,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) return 1; } -/** +/* * vgic_its_save_collection_table - Save the collection table into * guest RAM */ @@ -2518,7 +2522,7 @@ static int vgic_its_save_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_restore_collection_table - reads the collection table * in guest memory and restores the ITS internal state. Requires the * BASER registers to be restored before. @@ -2556,7 +2560,7 @@ static int vgic_its_restore_collection_table(struct vgic_its *its) return ret; } -/** +/* * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM * according to v0 ABI */ @@ -2571,7 +2575,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its) return vgic_its_save_collection_table(its); } -/** +/* * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM * to internal data structs according to V0 ABI * diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ed6e412cd74bacbdb91588c6e637ef6f716353df..3eecdd2f4b8f552665096a2c3d1528c66f468c77 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -370,7 +370,7 @@ static void map_all_vpes(struct kvm *kvm) dist->its_vm.vpes[i]->irq)); } -/** +/* * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c index f07b3ddff7d442be815ad15f3462d5539c84e76b..abe29c7d85d05a0222b2854c82d86d822480d9bb 100644 --- a/arch/arm64/kvm/vgic/vgic.c +++ b/arch/arm64/kvm/vgic/vgic.c @@ -36,6 +36,11 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * we have to disable IRQs before taking this lock and everything lower * than it. * + * The config_lock has additional ordering requirements: + * kvm->slots_lock + * kvm->srcu + * kvm->arch.config_lock + * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. * If you are already holding a lock and need to take a higher one, you @@ -313,7 +318,7 @@ static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owne * with all locks dropped. */ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags) + unsigned long flags) __releases(&irq->irq_lock) { struct kvm_vcpu *vcpu; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 03d356a123771fda53c9aff25e475cbb68079bc6..8532bfe3fed40c6fcfb05515bf0709e59278d07b 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -186,7 +186,7 @@ bool vgic_get_phys_line_level(struct vgic_irq *irq); void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, - unsigned long flags); + unsigned long flags) __releases(&irq->irq_lock); void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); @@ -346,4 +346,11 @@ void vgic_v4_configure_vsgis(struct kvm *kvm); void vgic_v4_get_vlpi_state(struct vgic_irq *irq, bool *val); int vgic_v4_request_vpe_irq(struct kvm_vcpu *vcpu, int irq); +static inline bool kvm_has_gicv3(struct kvm *kvm) +{ + return (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) && + irqchip_in_kernel(kvm) && + kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3); +} + #endif diff --git a/arch/loongarch/include/asm/hugetlb.h b/arch/loongarch/include/asm/hugetlb.h index aa44b3fe43dde781fa0f9ef9517edcdc0f0bff89..5da32c00d483fbab845664373b950d45e98982bd 100644 --- a/arch/loongarch/include/asm/hugetlb.h +++ b/arch/loongarch/include/asm/hugetlb.h @@ -34,7 +34,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pte_t clear; - pte_t pte = *ptep; + pte_t pte = ptep_get(ptep); pte_val(clear) = (unsigned long)invalid_pte_table; set_pte_at(mm, addr, ptep, clear); @@ -65,7 +65,7 @@ static inline int huge_ptep_set_access_flags(struct vm_area_struct *vma, pte_t *ptep, pte_t pte, int dirty) { - int changed = !pte_same(*ptep, pte); + int changed = !pte_same(ptep_get(ptep), pte); if (changed) { set_pte_at(vma->vm_mm, addr, ptep, pte); diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 92636e82957c7ed58201073c1477c68568421602..da9e93024626c5c96ee92db453a0910472a6c7c4 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -53,13 +53,13 @@ static inline bool kfence_protect_page(unsigned long addr, bool protect) { pte_t *pte = virt_to_kpte(addr); - if (WARN_ON(!pte) || pte_none(*pte)) + if (WARN_ON(!pte) || pte_none(ptep_get(pte))) return false; if (protect) - set_pte(pte, __pte(pte_val(*pte) & ~(_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) & ~(_PAGE_VALID | _PAGE_PRESENT))); else - set_pte(pte, __pte(pte_val(*pte) | (_PAGE_VALID | _PAGE_PRESENT))); + set_pte(pte, __pte(pte_val(ptep_get(pte)) | (_PAGE_VALID | _PAGE_PRESENT))); preempt_disable(); local_flush_tlb_one(addr); diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 44b54965f5b4f888b41d8eab7049f5b55ead0e37..5f0677e03817baacc343c8bb35f3eb6b410d398b 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -26,8 +26,6 @@ #define KVM_MAX_VCPUS 256 #define KVM_MAX_CPUCFG_REGS 21 -/* memory slots that does not exposed to userspace */ -#define KVM_PRIVATE_MEM_SLOTS 0 #define KVM_HALT_POLL_NS_DEFAULT 500000 #define KVM_REQ_TLB_FLUSH_GPA KVM_ARCH_REQ(0) diff --git a/arch/loongarch/include/asm/kvm_para.h b/arch/loongarch/include/asm/kvm_para.h index 335fb86778e2bc4d7a82c7e57670277a90efc481..43ec61589e6cde88f7120b5052d32e4435c0c13d 100644 --- a/arch/loongarch/include/asm/kvm_para.h +++ b/arch/loongarch/include/asm/kvm_para.h @@ -39,9 +39,9 @@ struct kvm_steal_time { * Hypercall interface for KVM hypervisor * * a0: function identifier - * a1-a6: args + * a1-a5: args * Return value will be placed in a0. - * Up to 6 arguments are passed in a1, a2, a3, a4, a5, a6. + * Up to 5 arguments are passed in a1, a2, a3, a4, a5. */ static __always_inline long kvm_hypercall0(u64 fid) { diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h index 3fbf1f37c58ecf6ee447adf4aeccd57d7a2bd538..85431f20a14db77f5ba77b78d6ccc7a199bb6eaa 100644 --- a/arch/loongarch/include/asm/pgtable.h +++ b/arch/loongarch/include/asm/pgtable.h @@ -106,6 +106,9 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; #define KFENCE_AREA_START (VMEMMAP_END + 1) #define KFENCE_AREA_END (KFENCE_AREA_START + KFENCE_AREA_SIZE - 1) +#define ptep_get(ptep) READ_ONCE(*(ptep)) +#define pmdp_get(pmdp) READ_ONCE(*(pmdp)) + #define pte_ERROR(e) \ pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) #ifndef __PAGETABLE_PMD_FOLDED @@ -147,11 +150,6 @@ static inline int p4d_present(p4d_t p4d) return p4d_val(p4d) != (unsigned long)invalid_pud_table; } -static inline void p4d_clear(p4d_t *p4dp) -{ - p4d_val(*p4dp) = (unsigned long)invalid_pud_table; -} - static inline pud_t *p4d_pgtable(p4d_t p4d) { return (pud_t *)p4d_val(p4d); @@ -159,7 +157,12 @@ static inline pud_t *p4d_pgtable(p4d_t p4d) static inline void set_p4d(p4d_t *p4d, p4d_t p4dval) { - *p4d = p4dval; + WRITE_ONCE(*p4d, p4dval); +} + +static inline void p4d_clear(p4d_t *p4dp) +{ + set_p4d(p4dp, __p4d((unsigned long)invalid_pud_table)); } #define p4d_phys(p4d) PHYSADDR(p4d_val(p4d)) @@ -193,17 +196,20 @@ static inline int pud_present(pud_t pud) return pud_val(pud) != (unsigned long)invalid_pmd_table; } -static inline void pud_clear(pud_t *pudp) +static inline pmd_t *pud_pgtable(pud_t pud) { - pud_val(*pudp) = ((unsigned long)invalid_pmd_table); + return (pmd_t *)pud_val(pud); } -static inline pmd_t *pud_pgtable(pud_t pud) +static inline void set_pud(pud_t *pud, pud_t pudval) { - return (pmd_t *)pud_val(pud); + WRITE_ONCE(*pud, pudval); } -#define set_pud(pudptr, pudval) do { *(pudptr) = (pudval); } while (0) +static inline void pud_clear(pud_t *pudp) +{ + set_pud(pudp, __pud((unsigned long)invalid_pmd_table)); +} #define pud_phys(pud) PHYSADDR(pud_val(pud)) #define pud_page(pud) (pfn_to_page(pud_phys(pud) >> PAGE_SHIFT)) @@ -231,12 +237,15 @@ static inline int pmd_present(pmd_t pmd) return pmd_val(pmd) != (unsigned long)invalid_pte_table; } -static inline void pmd_clear(pmd_t *pmdp) +static inline void set_pmd(pmd_t *pmd, pmd_t pmdval) { - pmd_val(*pmdp) = ((unsigned long)invalid_pte_table); + WRITE_ONCE(*pmd, pmdval); } -#define set_pmd(pmdptr, pmdval) do { *(pmdptr) = (pmdval); } while (0) +static inline void pmd_clear(pmd_t *pmdp) +{ + set_pmd(pmdp, __pmd((unsigned long)invalid_pte_table)); +} #define pmd_phys(pmd) PHYSADDR(pmd_val(pmd)) @@ -314,7 +323,8 @@ extern void paging_init(void); static inline void set_pte(pte_t *ptep, pte_t pteval) { - *ptep = pteval; + WRITE_ONCE(*ptep, pteval); + if (pte_val(pteval) & _PAGE_GLOBAL) { pte_t *buddy = ptep_buddy(ptep); /* @@ -341,8 +351,8 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) : [buddy] "+m" (buddy->pte), [tmp] "=&r" (tmp) : [global] "r" (page_global)); #else /* !CONFIG_SMP */ - if (pte_none(*buddy)) - pte_val(*buddy) = pte_val(*buddy) | _PAGE_GLOBAL; + if (pte_none(ptep_get(buddy))) + WRITE_ONCE(*buddy, __pte(pte_val(ptep_get(buddy)) | _PAGE_GLOBAL)); #endif /* CONFIG_SMP */ } } @@ -350,7 +360,7 @@ static inline void set_pte(pte_t *ptep, pte_t pteval) static inline void pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { /* Preserve global status for the pair */ - if (pte_val(*ptep_buddy(ptep)) & _PAGE_GLOBAL) + if (pte_val(ptep_get(ptep_buddy(ptep))) & _PAGE_GLOBAL) set_pte(ptep, __pte(_PAGE_GLOBAL)); else set_pte(ptep, __pte(0)); @@ -603,7 +613,7 @@ static inline pmd_t pmd_mkinvalid(pmd_t pmd) static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old = *pmdp; + pmd_t old = pmdp_get(pmdp); pmd_clear(pmdp); diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls index 523bb411a3bca559cb5b0e988dd08f662dac89d3..ab7d9baa29152da97932c7e447a183fba265451c 100644 --- a/arch/loongarch/kernel/Makefile.syscalls +++ b/arch/loongarch/kernel/Makefile.syscalls @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -syscall_abis_64 += newstat +# No special ABIs on loongarch so far +syscall_abis_64 += diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c index 000825406c1f62cdebd32e79714738987d20d5cc..2bf86aeda874c79f58ad6d6ba79477a4eda1811b 100644 --- a/arch/loongarch/kernel/efi.c +++ b/arch/loongarch/kernel/efi.c @@ -66,6 +66,12 @@ void __init efi_runtime_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); } +bool efi_poweroff_required(void) +{ + return efi_enabled(EFI_RUNTIME_SERVICES) && + (acpi_gbl_reduced_hardware || acpi_no_s5); +} + unsigned long __initdata screen_info_table = EFI_INVALID_TABLE_ADDR; #if defined(CONFIG_SYSFB) || defined(CONFIG_EFI_EARLYCON) diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 2634a9e8d82cc54889465fe4042a50699ee6c186..28681dfb4b85975b04d2d81960312d929c1e3c82 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -714,19 +714,19 @@ static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, * value) and then p*d_offset() walks into the target huge page instead * of the old page table (sees the new value). */ - pgd = READ_ONCE(*pgd_offset(kvm->mm, hva)); + pgd = pgdp_get(pgd_offset(kvm->mm, hva)); if (pgd_none(pgd)) goto out; - p4d = READ_ONCE(*p4d_offset(&pgd, hva)); + p4d = p4dp_get(p4d_offset(&pgd, hva)); if (p4d_none(p4d) || !p4d_present(p4d)) goto out; - pud = READ_ONCE(*pud_offset(&p4d, hva)); + pud = pudp_get(pud_offset(&p4d, hva)); if (pud_none(pud) || !pud_present(pud)) goto out; - pmd = READ_ONCE(*pmd_offset(&pud, hva)); + pmd = pmdp_get(pmd_offset(&pud, hva)); if (pmd_none(pmd) || !pmd_present(pmd)) goto out; diff --git a/arch/loongarch/mm/hugetlbpage.c b/arch/loongarch/mm/hugetlbpage.c index 12222c56cb594829f6b4e1601feb36a19e99e584..e4068906143b33a9d850976692141252de43de17 100644 --- a/arch/loongarch/mm/hugetlbpage.c +++ b/arch/loongarch/mm/hugetlbpage.c @@ -39,11 +39,11 @@ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, pmd_t *pmd = NULL; pgd = pgd_offset(mm, addr); - if (pgd_present(*pgd)) { + if (pgd_present(pgdp_get(pgd))) { p4d = p4d_offset(pgd, addr); - if (p4d_present(*p4d)) { + if (p4d_present(p4dp_get(p4d))) { pud = pud_offset(p4d, addr); - if (pud_present(*pud)) + if (pud_present(pudp_get(pud))) pmd = pmd_offset(pud, addr); } } diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c index bf789d114c2d79a284b5d6e5acb84659c2c5d375..8a87a482c8f44bf085ae792da33342bb35b9b010 100644 --- a/arch/loongarch/mm/init.c +++ b/arch/loongarch/mm/init.c @@ -141,7 +141,7 @@ void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node, int __meminit vmemmap_check_pmd(pmd_t *pmd, int node, unsigned long addr, unsigned long next) { - int huge = pmd_val(*pmd) & _PAGE_HUGE; + int huge = pmd_val(pmdp_get(pmd)) & _PAGE_HUGE; if (huge) vmemmap_verify((pte_t *)pmd, node, addr, next); @@ -173,7 +173,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pud) panic("%s: Failed to allocate memory\n", __func__); @@ -184,7 +184,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE); if (!pmd) panic("%s: Failed to allocate memory\n", __func__); @@ -195,7 +195,7 @@ pte_t * __init populate_kernel_pte(unsigned long addr) } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *pte; pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); @@ -216,7 +216,7 @@ void __init __set_fixmap(enum fixed_addresses idx, BUG_ON(idx <= FIX_HOLE || idx >= __end_of_fixed_addresses); ptep = populate_kernel_pte(addr); - if (!pte_none(*ptep)) { + if (!pte_none(ptep_get(ptep))) { pte_ERROR(*ptep); return; } diff --git a/arch/loongarch/mm/kasan_init.c b/arch/loongarch/mm/kasan_init.c index c608adc9984581d0419594a8eb87ae18a3e9ec63..427d6b1aec09e707837831888f55275165127c9b 100644 --- a/arch/loongarch/mm/kasan_init.c +++ b/arch/loongarch/mm/kasan_init.c @@ -105,7 +105,7 @@ static phys_addr_t __init kasan_alloc_zeroed_page(int node) static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, bool early) { - if (__pmd_none(early, READ_ONCE(*pmdp))) { + if (__pmd_none(early, pmdp_get(pmdp))) { phys_addr_t pte_phys = early ? __pa_symbol(kasan_early_shadow_pte) : kasan_alloc_zeroed_page(node); if (!early) @@ -118,7 +118,7 @@ static pte_t *__init kasan_pte_offset(pmd_t *pmdp, unsigned long addr, int node, static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, bool early) { - if (__pud_none(early, READ_ONCE(*pudp))) { + if (__pud_none(early, pudp_get(pudp))) { phys_addr_t pmd_phys = early ? __pa_symbol(kasan_early_shadow_pmd) : kasan_alloc_zeroed_page(node); if (!early) @@ -131,7 +131,7 @@ static pmd_t *__init kasan_pmd_offset(pud_t *pudp, unsigned long addr, int node, static pud_t *__init kasan_pud_offset(p4d_t *p4dp, unsigned long addr, int node, bool early) { - if (__p4d_none(early, READ_ONCE(*p4dp))) { + if (__p4d_none(early, p4dp_get(p4dp))) { phys_addr_t pud_phys = early ? __pa_symbol(kasan_early_shadow_pud) : kasan_alloc_zeroed_page(node); if (!early) @@ -154,7 +154,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, : kasan_alloc_zeroed_page(node); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && __pte_none(early, READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && __pte_none(early, ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, @@ -166,7 +166,7 @@ static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, do { next = pmd_addr_end(addr, end); kasan_pte_populate(pmdp, addr, next, node, early); - } while (pmdp++, addr = next, addr != end && __pmd_none(early, READ_ONCE(*pmdp))); + } while (pmdp++, addr = next, addr != end && __pmd_none(early, pmdp_get(pmdp))); } static void __init kasan_pud_populate(p4d_t *p4dp, unsigned long addr, diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index bda018150000e66b906420ea7e3a5f79472ca352..eb6a29b491a72be73f64a472d57fe3c2856bc9c1 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -128,7 +128,7 @@ pmd_t mk_pmd(struct page *page, pgprot_t prot) void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { - *pmdp = pmd; + WRITE_ONCE(*pmdp, pmd); flush_tlb_all(); } diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 368e8475870f0810be1a707266c357c06b51ffd1..5f6e9e2ebbdbb805b37a2f7deef744e7bc3bb123 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -303,13 +303,6 @@ int r4k_clockevent_init(void) if (!c0_compare_int_usable()) return -ENXIO; - /* - * With vectored interrupts things are getting platform specific. - * get_c0_compare_int is a hook to allow a platform to return the - * interrupt number of its liking. - */ - irq = get_c0_compare_int(); - cd = &per_cpu(mips_clockevent_device, cpu); cd->name = "MIPS"; @@ -320,7 +313,6 @@ int r4k_clockevent_init(void) min_delta = calculate_min_delta(); cd->rating = 300; - cd->irq = irq; cd->cpumask = cpumask_of(cpu); cd->set_next_event = mips_next_event; cd->event_handler = mips_event_handler; @@ -332,6 +324,13 @@ int r4k_clockevent_init(void) cp0_timer_irq_installed = 1; + /* + * With vectored interrupts things are getting platform specific. + * get_c0_compare_int is a hook to allow a platform to return the + * interrupt number of its liking. + */ + irq = get_c0_compare_int(); + if (request_irq(irq, c0_compare_interrupt, flags, "timer", c0_compare_interrupt)) pr_err("Failed to request irq %d (timer)\n", irq); diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index bda7f193baab9f6709e43212fbc2960a0eae1cc0..af7412549e6ea4484543dfe5846d58b632778ff2 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -1724,12 +1724,16 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); c->ases &= ~MIPS_ASE_VZ; /* VZ of Loongson-3A2000/3000 is incomplete */ + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; case PRID_IMP_LOONGSON_64G: __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); decode_cpucfg(c); + change_c0_config6(LOONGSON_CONF6_EXTIMER | LOONGSON_CONF6_INTIMER, + LOONGSON_CONF6_INTIMER); break; default: panic("Unknown Loongson Processor ID!"); diff --git a/arch/mips/sgi-ip22/ip22-gio.c b/arch/mips/sgi-ip22/ip22-gio.c index 2738325e98dd0900539d65ba34956ce434bfc139..d20eec742bfaab6fdc0238f1926e98189a25fa5b 100644 --- a/arch/mips/sgi-ip22/ip22-gio.c +++ b/arch/mips/sgi-ip22/ip22-gio.c @@ -111,7 +111,7 @@ void gio_device_unregister(struct gio_device *giodev) } EXPORT_SYMBOL_GPL(gio_device_unregister); -static int gio_bus_match(struct device *dev, struct device_driver *drv) +static int gio_bus_match(struct device *dev, const struct device_driver *drv) { struct gio_device *gio_dev = to_gio_device(dev); struct gio_driver *gio_drv = to_gio_driver(drv); diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 5d650e02cbf4a33cc6bf5b4a1b1d9e106f399d40..b0a2ac3ba91610e4240c04f09f4e52a2600c6636 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -20,6 +20,7 @@ config PARISC select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index 2a60d7a72f1fa80b96b058eced6c33895693b9b2..a3f0f100f21949f453c18119e61aaead71e01435 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -20,7 +20,16 @@ #define SMP_CACHE_BYTES L1_CACHE_BYTES -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#ifdef CONFIG_PA20 +#define ARCH_DMA_MINALIGN 128 +#else +#define ARCH_DMA_MINALIGN 32 +#endif +#define ARCH_KMALLOC_MINALIGN 16 /* ldcw requires 16-byte alignment */ + +#define arch_slab_minalign() ((unsigned)dcache_stride) +#define cache_line_size() dcache_stride +#define dma_get_cache_alignment cache_line_size #define __read_mostly __section(".data..read_mostly") diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c index 979f45d4d1fbeae5a60d92345409d413d352e7e1..06cbcd6fe87b80e0eeb35c8b134fe5b424ef3825 100644 --- a/arch/parisc/net/bpf_jit_core.c +++ b/arch/parisc/net/bpf_jit_core.c @@ -114,7 +114,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) jit_data->header = bpf_jit_binary_alloc(prog_size + extable_size, &jit_data->image, - sizeof(u32), + sizeof(long), bpf_fill_ill_insns); if (!jit_data->header) { prog = orig_prog; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index f4e6f2dd04b731ec3eb5154986a8aecd573dd8d0..16bacfe8c7a2ca1f97d403d2889e7135effbd978 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -145,6 +145,7 @@ static inline int cpu_to_coregroup_id(int cpu) #ifdef CONFIG_HOTPLUG_SMT #include +#include #include static inline bool topology_is_primary_thread(unsigned int cpu) @@ -156,6 +157,18 @@ static inline bool topology_smt_thread_allowed(unsigned int cpu) { return cpu_thread_in_core(cpu) < cpu_smt_num_threads; } + +#define topology_is_core_online topology_is_core_online +static inline bool topology_is_core_online(unsigned int cpu) +{ + int i, first_cpu = cpu_first_thread_sibling(cpu); + + for (i = first_cpu; i < first_cpu + threads_per_core; ++i) { + if (cpu_online(i)) + return true; + } + return false; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 4bd2f87616baa93a11e4015e01e9c7e986139eeb..943430077375a4e3c95f4ee9c8d38270081d557e 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -959,6 +959,7 @@ void __init setup_arch(char **cmdline_p) mem_topology_setup(); /* Set max_mapnr before paging_init() */ set_max_mapnr(max_pfn); + high_memory = (void *)__va(max_low_pfn * PAGE_SIZE); /* * Release secondary cpus out of their spinloops at 0x60 now that diff --git a/arch/powerpc/mm/init-common.c b/arch/powerpc/mm/init-common.c index 9b4a675eb8f877ce313d125ca007f0ed3cde92a1..2978fcbe307eabf43984ce631e7faa1dbeaff9c0 100644 --- a/arch/powerpc/mm/init-common.c +++ b/arch/powerpc/mm/init-common.c @@ -73,7 +73,7 @@ void setup_kup(void) #define CTOR(shift) static void ctor_##shift(void *addr) \ { \ - memset(addr, 0, sizeof(void *) << (shift)); \ + memset(addr, 0, sizeof(pgd_t) << (shift)); \ } CTOR(0); CTOR(1); CTOR(2); CTOR(3); CTOR(4); CTOR(5); CTOR(6); CTOR(7); @@ -117,7 +117,7 @@ EXPORT_SYMBOL_GPL(pgtable_cache); /* used by kvm_hv module */ void pgtable_cache_add(unsigned int shift) { char *name; - unsigned long table_size = sizeof(void *) << shift; + unsigned long table_size = sizeof(pgd_t) << shift; unsigned long align = table_size; /* When batching pgtable pointers for RCU freeing, we store diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index d325217ab201214b2af406afe4fcb4701cb5f51a..da21cb018984eb84f39e22a280e918dcd955c8ac 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -290,8 +290,6 @@ void __init mem_init(void) swiotlb_init(ppc_swiotlb_enable, ppc_swiotlb_flags); #endif - high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); - kasan_late_init(); memblock_free_all(); diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h index ef01c182af2b0507d53f0b774f6229d4ae6caacd..ffb9484531af7e4166ef970cb7c79f04176a2823 100644 --- a/arch/riscv/include/asm/hwprobe.h +++ b/arch/riscv/include/asm/hwprobe.h @@ -8,7 +8,7 @@ #include -#define RISCV_HWPROBE_MAX_KEY 8 +#define RISCV_HWPROBE_MAX_KEY 9 static inline bool riscv_hwprobe_key_is_valid(__s64 key) { diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index b706c8e47b027b1daa698928d96858622ea8a616..1e153cda57db85b99d40f4fcc67aa69651a0f856 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -82,6 +82,12 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE 6 #define RISCV_HWPROBE_KEY_HIGHEST_VIRT_ADDRESS 7 #define RISCV_HWPROBE_KEY_TIME_CSR_FREQ 8 +#define RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF 9 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN 0 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED 1 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW 2 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_FAST 3 +#define RISCV_HWPROBE_MISALIGNED_SCALAR_UNSUPPORTED 4 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */ /* Flags */ diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls index 52087a023b3df313709502f4ee3e046578c4b3a8..9668fd1faf60e828ed2786c2ee84739ac1f153fc 100644 --- a/arch/riscv/kernel/Makefile.syscalls +++ b/arch/riscv/kernel/Makefile.syscalls @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += riscv memfd_secret -syscall_abis_64 += riscv newstat rlimit memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/acpi_numa.c b/arch/riscv/kernel/acpi_numa.c index 0231482d6946aa7648c444a617d44f73aa87cb0e..ff95aeebee3ebcd8e2b2eba42ec2e225932d6060 100644 --- a/arch/riscv/kernel/acpi_numa.c +++ b/arch/riscv/kernel/acpi_numa.c @@ -28,7 +28,7 @@ #include -static int acpi_early_node_map[NR_CPUS] __initdata = { NUMA_NO_NODE }; +static int acpi_early_node_map[NR_CPUS] __initdata = { [0 ... NR_CPUS - 1] = NUMA_NO_NODE }; int __init acpi_numa_get_nid(unsigned int cpu) { diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 8f20607adb406d2fd94ae2628de1872c75defd11..b427188b28fc16defce7c05ae178b8b9de346d8f 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -432,28 +432,26 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { ext = riscv_get_isa_ext_data(bit); - if (!ext) - continue; - if (ext->validate) { + if (ext && ext->validate) { ret = ext->validate(ext, resolved_isa); if (ret == -EPROBE_DEFER) { loop = true; continue; } else if (ret) { /* Disable the extension entirely */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); continue; } } - set_bit(ext->id, resolved_isa); + set_bit(bit, resolved_isa); /* No need to keep it in source isa now that it is enabled */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); /* Single letter extensions get set in hwcap */ - if (ext->id < RISCV_ISA_EXT_BASE) - *this_hwcap |= isa2hwcap[ext->id]; + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51fde6940303514732e16ba72dd574d..34ef522f07a8c2179b5643b07c82f22284a1907b 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index 1026e22955ccc3a036e33c10bc1dd1324fce3667..0cc5559c08d8ff2c2e8a74bb9bbecfe8f383c104 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -71,7 +71,7 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); diff --git a/arch/riscv/kernel/sys_hwprobe.c b/arch/riscv/kernel/sys_hwprobe.c index 8d1b5c35d2a7315b15d3355f29eb8c8a4e24e250..cea0ca2bf2a25ecc671e31b141e84c6d1977da25 100644 --- a/arch/riscv/kernel/sys_hwprobe.c +++ b/arch/riscv/kernel/sys_hwprobe.c @@ -178,13 +178,13 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) perf = this_perf; if (perf != this_perf) { - perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + perf = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; break; } } if (perf == -1ULL) - return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + return RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; return perf; } @@ -192,12 +192,12 @@ static u64 hwprobe_misaligned(const struct cpumask *cpus) static u64 hwprobe_misaligned(const struct cpumask *cpus) { if (IS_ENABLED(CONFIG_RISCV_EFFICIENT_UNALIGNED_ACCESS)) - return RISCV_HWPROBE_MISALIGNED_FAST; + return RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; if (IS_ENABLED(CONFIG_RISCV_EMULATED_UNALIGNED_ACCESS) && unaligned_ctl_available()) - return RISCV_HWPROBE_MISALIGNED_EMULATED; + return RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; - return RISCV_HWPROBE_MISALIGNED_SLOW; + return RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; } #endif @@ -225,6 +225,7 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair, break; case RISCV_HWPROBE_KEY_CPUPERF_0: + case RISCV_HWPROBE_KEY_MISALIGNED_SCALAR_PERF: pair->value = hwprobe_misaligned(cpus); break; diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee858f3abf7a28c647ad1146410da0..51ebfd23e0076447518081d137102a9a11ff2e45 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index b62d5a2f4541e754cc7ea322e38d42b4dab65b90..192cd5603e95f9b92211cb616b98df7c6bc602d9 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -338,7 +338,7 @@ int handle_misaligned_load(struct pt_regs *regs) perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr); #ifdef CONFIG_RISCV_PROBE_UNALIGNED_ACCESS - *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_EMULATED; + *this_cpu_ptr(&misaligned_access_speed) = RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED; #endif if (!unaligned_enabled) @@ -532,13 +532,13 @@ static bool check_unaligned_access_emulated(int cpu) unsigned long tmp_var, tmp_val; bool misaligned_emu_detected; - *mas_ptr = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + *mas_ptr = RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN; __asm__ __volatile__ ( " "REG_L" %[tmp], 1(%[ptr])\n" : [tmp] "=r" (tmp_val) : [ptr] "r" (&tmp_var) : "memory"); - misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_EMULATED); + misaligned_emu_detected = (*mas_ptr == RISCV_HWPROBE_MISALIGNED_SCALAR_EMULATED); /* * If unaligned_ctl is already set, this means that we detected that all * CPUS uses emulated misaligned access at boot time. If that changed diff --git a/arch/riscv/kernel/unaligned_access_speed.c b/arch/riscv/kernel/unaligned_access_speed.c index a9a6bcb02acf111cee35a274716a1c78cbdf5886..160628a2116de4a5e4135e4bba25fef35141b747 100644 --- a/arch/riscv/kernel/unaligned_access_speed.c +++ b/arch/riscv/kernel/unaligned_access_speed.c @@ -34,9 +34,9 @@ static int check_unaligned_access(void *param) struct page *page = param; void *dst; void *src; - long speed = RISCV_HWPROBE_MISALIGNED_SLOW; + long speed = RISCV_HWPROBE_MISALIGNED_SCALAR_SLOW; - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) return 0; /* Make an unaligned destination buffer. */ @@ -95,14 +95,14 @@ static int check_unaligned_access(void *param) } if (word_cycles < byte_cycles) - speed = RISCV_HWPROBE_MISALIGNED_FAST; + speed = RISCV_HWPROBE_MISALIGNED_SCALAR_FAST; ratio = div_u64((byte_cycles * 100), word_cycles); pr_info("cpu%d: Ratio of byte access time to unaligned word access is %d.%02d, unaligned accesses are %s\n", cpu, ratio / 100, ratio % 100, - (speed == RISCV_HWPROBE_MISALIGNED_FAST) ? "fast" : "slow"); + (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) ? "fast" : "slow"); per_cpu(misaligned_access_speed, cpu) = speed; @@ -110,7 +110,7 @@ static int check_unaligned_access(void *param) * Set the value of fast_misaligned_access of a CPU. These operations * are atomic to avoid race conditions. */ - if (speed == RISCV_HWPROBE_MISALIGNED_FAST) + if (speed == RISCV_HWPROBE_MISALIGNED_SCALAR_FAST) cpumask_set_cpu(cpu, &fast_misaligned_access); else cpumask_clear_cpu(cpu, &fast_misaligned_access); @@ -188,7 +188,7 @@ static int riscv_online_cpu(unsigned int cpu) static struct page *buf; /* We are already set since the last check */ - if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_UNKNOWN) + if (per_cpu(misaligned_access_speed, cpu) != RISCV_HWPROBE_MISALIGNED_SCALAR_UNKNOWN) goto exit; buf = alloc_pages(GFP_KERNEL, MISALIGNED_BUFFER_ORDER); diff --git a/arch/riscv/kernel/vendor_extensions.c b/arch/riscv/kernel/vendor_extensions.c index b6c1e7b5d34b36aa024f7f9328c8b76f012b844f..a8126d1183412fced15572d7984c9e8d1b061999 100644 --- a/arch/riscv/kernel/vendor_extensions.c +++ b/arch/riscv/kernel/vendor_extensions.c @@ -38,7 +38,7 @@ bool __riscv_isa_vendor_extension_available(int cpu, unsigned long vendor, unsig #ifdef CONFIG_RISCV_ISA_VENDOR_EXT_ANDES case ANDES_VENDOR_ID: bmap = &riscv_isa_vendor_ext_list_andes.all_harts_isa_bitmap; - cpu_bmap = &riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap[cpu]; + cpu_bmap = riscv_isa_vendor_ext_list_andes.per_hart_isa_bitmap; break; #endif default: diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f373380225a2dfdecd40bfc01ce8f5d13bc0..a9f2b4af8f3f18442117f79855cfe89b39ce4d7e 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -61,26 +61,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index bfa2dea95354ff85a42f7062903b5757e5982db7..eb0649a61b4c1f3249967f893cf32cfdbadfb2d5 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,8 +234,6 @@ static void __init setup_bootmem(void) */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); - /* * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. @@ -250,6 +248,16 @@ static void __init setup_bootmem(void) if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; + /* + * The size of the linear page mapping may restrict the amount of + * usable RAM. + */ + if (IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + } + /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: @@ -266,6 +274,7 @@ static void __init setup_bootmem(void) memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } + phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); @@ -918,7 +927,7 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; + end_va = kernel_map.virt_addr + kernel_map.size; for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), @@ -1087,7 +1096,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; #else @@ -1284,8 +1293,6 @@ static void __init create_linear_mapping_page_table(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - if (end >= __pa(PAGE_OFFSET) + memory_limit) - end = __pa(PAGE_OFFSET) + memory_limit; create_linear_mapping_range(start, end, 0, NULL); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 5bcf3af903daa2f9fb2aaf1e57d79121bfcda988..0e6ca6d5ae4b418f1c4d99906a5ab57f10485a00 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,6 +7,7 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ +#include #include .text @@ -34,6 +35,7 @@ SYM_CODE_END(purgatory_start) .data +.align LGREG SYM_DATA(riscv_kernel_entry, .quad 0) .end diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a822f952f64a9c6cafadd3e8b96f905edfcc5829..c60e699e99f5b3614e922dbf87c52c28042e98f6 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -604,6 +604,19 @@ config RANDOMIZE_BASE as a security feature that deters exploit attempts relying on knowledge of the location of kernel internals. +config RANDOMIZE_IDENTITY_BASE + bool "Randomize the address of the identity mapping base" + depends on RANDOMIZE_BASE + default DEBUG_VM + help + The identity mapping base address is pinned to zero by default. + Allow randomization of that base to expose otherwise missed + notion of physical and virtual addresses of data structures. + That does not have any impact on the base address at which the + kernel image is loaded. + + If unsure, say N + config KERNEL_IMAGE_BASE hex "Kernel image base address" range 0x100000 0x1FFFFFE0000000 if !KASAN diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index ce232552bc1c38684ad8036203041ec9874eb8af..c73b5118ad429a01fea516d2d34ac3eeac0be79a 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -162,7 +162,7 @@ static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr, loc = (long)*reloc + phys_offset; if (loc < min_addr || loc > max_addr) error("64-bit relocation outside of kernel!\n"); - *(u64 *)loc += offset - __START_KERNEL; + *(u64 *)loc += offset; } } @@ -177,7 +177,7 @@ static void kaslr_adjust_got(unsigned long offset) */ for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++) { if (*entry) - *entry += offset - __START_KERNEL; + *entry += offset; } } @@ -252,7 +252,7 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE)); + BUILD_BUG_ON(!IS_ALIGNED(TEXT_OFFSET, THREAD_SIZE)); BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE)); BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE); vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE); @@ -341,7 +341,8 @@ static unsigned long setup_kernel_memory_layout(unsigned long kernel_size) BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS)); max_mappable = max(ident_map_size, MAX_DCSS_ADDR); max_mappable = min(max_mappable, vmemmap_start); - __identity_base = round_down(vmemmap_start - max_mappable, rte_size); + if (IS_ENABLED(CONFIG_RANDOMIZE_IDENTITY_BASE)) + __identity_base = round_down(vmemmap_start - max_mappable, rte_size); return asce_limit; } @@ -388,31 +389,25 @@ static void kaslr_adjust_vmlinux_info(long offset) #endif } -static void fixup_vmlinux_info(void) -{ - vmlinux.entry -= __START_KERNEL; - kaslr_adjust_vmlinux_info(-__START_KERNEL); -} - void startup_kernel(void) { - unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size; - unsigned long nokaslr_offset_phys, kaslr_large_page_offset; - unsigned long amode31_lma = 0; + unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size; + unsigned long nokaslr_text_lma, text_lma = 0, amode31_lma = 0; + unsigned long kernel_size = TEXT_OFFSET + vmlinux_size; + unsigned long kaslr_large_page_offset; unsigned long max_physmem_end; unsigned long asce_limit; unsigned long safe_addr; psw_t psw; - fixup_vmlinux_info(); setup_lpp(); /* * Non-randomized kernel physical start address must be _SEGMENT_SIZE * aligned (see blow). */ - nokaslr_offset_phys = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); - safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size); + nokaslr_text_lma = ALIGN(mem_safe_offset(), _SEGMENT_SIZE); + safe_addr = PAGE_ALIGN(nokaslr_text_lma + vmlinux_size); /* * Reserve decompressor memory together with decompression heap, @@ -456,16 +451,27 @@ void startup_kernel(void) */ kaslr_large_page_offset = __kaslr_offset & ~_SEGMENT_MASK; if (kaslr_enabled()) { - unsigned long end = ident_map_size - kaslr_large_page_offset; + unsigned long size = vmlinux_size + kaslr_large_page_offset; - __kaslr_offset_phys = randomize_within_range(kernel_size, _SEGMENT_SIZE, 0, end); + text_lma = randomize_within_range(size, _SEGMENT_SIZE, TEXT_OFFSET, ident_map_size); } - if (!__kaslr_offset_phys) - __kaslr_offset_phys = nokaslr_offset_phys; - __kaslr_offset_phys |= kaslr_large_page_offset; + if (!text_lma) + text_lma = nokaslr_text_lma; + text_lma |= kaslr_large_page_offset; + + /* + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region is + * never accessed via the kernel image mapping as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, this region could be used for something else and does + * not need to be reserved. See how it is skipped in setup_vmem(). + */ + __kaslr_offset_phys = text_lma - TEXT_OFFSET; kaslr_adjust_vmlinux_info(__kaslr_offset_phys); - physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size); - deploy_kernel((void *)__kaslr_offset_phys); + physmem_reserve(RR_VMLINUX, text_lma, vmlinux_size); + deploy_kernel((void *)text_lma); /* vmlinux decompression is done, shrink reserved low memory */ physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end); @@ -488,7 +494,7 @@ void startup_kernel(void) amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, amode31_min, SZ_2G); } if (!amode31_lma) - amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size; + amode31_lma = text_lma - vmlinux.amode31_size; physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size); /* @@ -504,8 +510,8 @@ void startup_kernel(void) * - copy_bootdata() must follow setup_vmem() to propagate changes * to bootdata made by setup_vmem() */ - clear_bss_section(__kaslr_offset_phys); - kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size, + clear_bss_section(text_lma); + kaslr_adjust_relocs(text_lma, text_lma + vmlinux.image_size, __kaslr_offset, __kaslr_offset_phys); kaslr_adjust_got(__kaslr_offset); setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit); diff --git a/arch/s390/boot/vmem.c b/arch/s390/boot/vmem.c index 2847cc059ab7aca7a7d89a2e023d300a85b718ea..145035f84a0e3e898ff708f815b5ac54ec541860 100644 --- a/arch/s390/boot/vmem.c +++ b/arch/s390/boot/vmem.c @@ -90,7 +90,7 @@ static void kasan_populate_shadow(unsigned long kernel_start, unsigned long kern } memgap_start = end; } - kasan_populate(kernel_start, kernel_end, POPULATE_KASAN_MAP_SHADOW); + kasan_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KASAN_MAP_SHADOW); kasan_populate(0, (unsigned long)__identity_va(0), POPULATE_KASAN_ZERO_SHADOW); kasan_populate(AMODE31_START, AMODE31_END, POPULATE_KASAN_ZERO_SHADOW); if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { @@ -475,7 +475,17 @@ void setup_vmem(unsigned long kernel_start, unsigned long kernel_end, unsigned l (unsigned long)__identity_va(end), POPULATE_IDENTITY); } - pgtable_populate(kernel_start, kernel_end, POPULATE_KERNEL); + + /* + * [kernel_start..kernel_start + TEXT_OFFSET] region is never + * accessed as per the linker script: + * + * . = TEXT_OFFSET; + * + * Therefore, skip mapping TEXT_OFFSET bytes to prevent access to + * [__kaslr_offset_phys..__kaslr_offset_phys + TEXT_OFFSET] region. + */ + pgtable_populate(kernel_start + TEXT_OFFSET, kernel_end, POPULATE_KERNEL); pgtable_populate(AMODE31_START, AMODE31_END, POPULATE_DIRECT); pgtable_populate(__abs_lowcore, __abs_lowcore + sizeof(struct lowcore), POPULATE_ABS_LOWCORE); diff --git a/arch/s390/boot/vmlinux.lds.S b/arch/s390/boot/vmlinux.lds.S index a750711d44c863054f1e2c26afa41a7b76553a28..66670212a36118ba79d41341cc4edd7a8cb4c2d4 100644 --- a/arch/s390/boot/vmlinux.lds.S +++ b/arch/s390/boot/vmlinux.lds.S @@ -109,7 +109,12 @@ SECTIONS #ifdef CONFIG_KERNEL_UNCOMPRESSED . = ALIGN(PAGE_SIZE); . += AMODE31_SIZE; /* .amode31 section */ - . = ALIGN(1 << 20); /* _SEGMENT_SIZE */ + + /* + * Make sure the location counter is not less than TEXT_OFFSET. + * _SEGMENT_SIZE is not available, use ALIGN(1 << 20) instead. + */ + . = MAX(TEXT_OFFSET, ALIGN(1 << 20)); #else . = ALIGN(8); #endif diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 06416b3f94f595d26ca6687ebf4cfc1e6a2a7758..16e4caa931f1f39bc91b5c762699295bc3763ba3 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -279,8 +279,9 @@ static inline unsigned long virt_to_pfn(const void *kaddr) #define AMODE31_SIZE (3 * PAGE_SIZE) #define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) -#define __START_KERNEL 0x100000 #define __NO_KASLR_START_KERNEL CONFIG_KERNEL_IMAGE_BASE #define __NO_KASLR_END_KERNEL (__NO_KASLR_START_KERNEL + KERNEL_IMAGE_SIZE) +#define TEXT_OFFSET 0x100000 + #endif /* _S390_PAGE_H */ diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index 0b5f8f3e84f126dc9225e0f81c658062139894cb..153d93468b77c0331605bbe418c99d920e049372 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -441,7 +441,10 @@ static inline int share(unsigned long addr, u16 cmd) if (!uv_call(0, (u64)&uvcb)) return 0; - return -EINVAL; + pr_err("%s UVC failed (rc: 0x%x, rrc: 0x%x), possible hypervisor bug.\n", + uvcb.header.cmd == UVC_CMD_SET_SHARED_ACCESS ? "Share" : "Unshare", + uvcb.header.rc, uvcb.header.rrc); + panic("System security cannot be guaranteed unless the system panics now.\n"); } /* diff --git a/arch/s390/kernel/alternative.h b/arch/s390/kernel/alternative.h deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c index fa90bbdc5ef9467c70dcba01f11e2d2ed273e115..6f2e87920288ac6d377279470a1763805fb0b71a 100644 --- a/arch/s390/kernel/fpu.c +++ b/arch/s390/kernel/fpu.c @@ -113,7 +113,7 @@ void load_fpu_state(struct fpu *state, int flags) int mask; if (flags & KERNEL_FPC) - fpu_lfpc(&state->fpc); + fpu_lfpc_safe(&state->fpc); if (!cpu_has_vx()) { if (flags & KERNEL_VXR_V0V7) load_fp_regs_vx(state->vxrs); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 4ec99f73fa27e19b2cc612a92ac3507206bc23ac..a3fea683b22706d742813b1a72fd088978878874 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -734,7 +734,23 @@ static void __init memblock_add_physmem_info(void) } /* - * Reserve memory used for lowcore/command line/kernel image. + * Reserve memory used for lowcore. + */ +static void __init reserve_lowcore(void) +{ + void *lowcore_start = get_lowcore(); + void *lowcore_end = lowcore_start + sizeof(struct lowcore); + void *start, *end; + + if ((void *)__identity_base < lowcore_end) { + start = max(lowcore_start, (void *)__identity_base); + end = min(lowcore_end, (void *)(__identity_base + ident_map_size)); + memblock_reserve(__pa(start), __pa(end)); + } +} + +/* + * Reserve memory used for absolute lowcore/command line/kernel image. */ static void __init reserve_kernel(void) { @@ -918,6 +934,7 @@ void __init setup_arch(char **cmdline_p) /* Do some memory reservations *before* memory is added to memblock */ reserve_pgtables(); + reserve_lowcore(); reserve_kernel(); reserve_initrd(); reserve_certificate_list(); diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 975c654cf5a51f42f89862895240e58c013a008d..ae5d0a9d6911bc78582339496c60d9ec18fa029b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -39,7 +39,7 @@ PHDRS { SECTIONS { - . = __START_KERNEL; + . = TEXT_OFFSET; .text : { _stext = .; /* Start of text section */ _text = .; /* Text and read-only data */ @@ -59,14 +59,6 @@ SECTIONS } :text = 0x0700 RO_DATA(PAGE_SIZE) - .data.rel.ro : { - *(.data.rel.ro .data.rel.ro.*) - } - .got : { - __got_start = .; - *(.got) - __got_end = .; - } . = ALIGN(PAGE_SIZE); _sdata = .; /* Start of data section */ @@ -80,6 +72,15 @@ SECTIONS . = ALIGN(PAGE_SIZE); __end_ro_after_init = .; + .data.rel.ro : { + *(.data.rel.ro .data.rel.ro.*) + } + .got : { + __got_start = .; + *(.got) + __got_end = .; + } + RW_DATA(0x100, PAGE_SIZE, THREAD_SIZE) .data.rel : { *(.data.rel*) diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index bf8534218af3dcdf7be20f58a8e95c904cb2b9c0..e680c6bf0c9d94dea8dd67657472610ed9a06a37 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -267,7 +267,12 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); + u32 gd; + + if (!kvm->arch.gisa_int.origin) + return 0; + + gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index 98dab3e049de2aef9d486f288d741b47168fc829..0a67fcee44141e966ff514784752c10a6973167d 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -15,13 +16,15 @@ static unsigned long max_addr; struct addr_marker { + int is_start; unsigned long start_address; const char *name; }; enum address_markers_idx { - IDENTITY_BEFORE_NR = 0, - IDENTITY_BEFORE_END_NR, + KVA_NR = 0, + LOWCORE_START_NR, + LOWCORE_END_NR, AMODE31_START_NR, AMODE31_END_NR, KERNEL_START_NR, @@ -30,8 +33,8 @@ enum address_markers_idx { KFENCE_START_NR, KFENCE_END_NR, #endif - IDENTITY_AFTER_NR, - IDENTITY_AFTER_END_NR, + IDENTITY_START_NR, + IDENTITY_END_NR, VMEMMAP_NR, VMEMMAP_END_NR, VMALLOC_NR, @@ -59,43 +62,44 @@ enum address_markers_idx { }; static struct addr_marker address_markers[] = { - [IDENTITY_BEFORE_NR] = {0, "Identity Mapping Start"}, - [IDENTITY_BEFORE_END_NR] = {(unsigned long)_stext, "Identity Mapping End"}, - [AMODE31_START_NR] = {0, "Amode31 Area Start"}, - [AMODE31_END_NR] = {0, "Amode31 Area End"}, - [KERNEL_START_NR] = {(unsigned long)_stext, "Kernel Image Start"}, - [KERNEL_END_NR] = {(unsigned long)_end, "Kernel Image End"}, + [KVA_NR] = {0, 0, "Kernel Virtual Address Space"}, + [LOWCORE_START_NR] = {1, 0, "Lowcore Start"}, + [LOWCORE_END_NR] = {0, 0, "Lowcore End"}, + [IDENTITY_START_NR] = {1, 0, "Identity Mapping Start"}, + [IDENTITY_END_NR] = {0, 0, "Identity Mapping End"}, + [AMODE31_START_NR] = {1, 0, "Amode31 Area Start"}, + [AMODE31_END_NR] = {0, 0, "Amode31 Area End"}, + [KERNEL_START_NR] = {1, (unsigned long)_stext, "Kernel Image Start"}, + [KERNEL_END_NR] = {0, (unsigned long)_end, "Kernel Image End"}, #ifdef CONFIG_KFENCE - [KFENCE_START_NR] = {0, "KFence Pool Start"}, - [KFENCE_END_NR] = {0, "KFence Pool End"}, + [KFENCE_START_NR] = {1, 0, "KFence Pool Start"}, + [KFENCE_END_NR] = {0, 0, "KFence Pool End"}, #endif - [IDENTITY_AFTER_NR] = {(unsigned long)_end, "Identity Mapping Start"}, - [IDENTITY_AFTER_END_NR] = {0, "Identity Mapping End"}, - [VMEMMAP_NR] = {0, "vmemmap Area Start"}, - [VMEMMAP_END_NR] = {0, "vmemmap Area End"}, - [VMALLOC_NR] = {0, "vmalloc Area Start"}, - [VMALLOC_END_NR] = {0, "vmalloc Area End"}, + [VMEMMAP_NR] = {1, 0, "vmemmap Area Start"}, + [VMEMMAP_END_NR] = {0, 0, "vmemmap Area End"}, + [VMALLOC_NR] = {1, 0, "vmalloc Area Start"}, + [VMALLOC_END_NR] = {0, 0, "vmalloc Area End"}, #ifdef CONFIG_KMSAN - [KMSAN_VMALLOC_SHADOW_START_NR] = {0, "Kmsan vmalloc Shadow Start"}, - [KMSAN_VMALLOC_SHADOW_END_NR] = {0, "Kmsan vmalloc Shadow End"}, - [KMSAN_VMALLOC_ORIGIN_START_NR] = {0, "Kmsan vmalloc Origins Start"}, - [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, "Kmsan vmalloc Origins End"}, - [KMSAN_MODULES_SHADOW_START_NR] = {0, "Kmsan Modules Shadow Start"}, - [KMSAN_MODULES_SHADOW_END_NR] = {0, "Kmsan Modules Shadow End"}, - [KMSAN_MODULES_ORIGIN_START_NR] = {0, "Kmsan Modules Origins Start"}, - [KMSAN_MODULES_ORIGIN_END_NR] = {0, "Kmsan Modules Origins End"}, + [KMSAN_VMALLOC_SHADOW_START_NR] = {1, 0, "Kmsan vmalloc Shadow Start"}, + [KMSAN_VMALLOC_SHADOW_END_NR] = {0, 0, "Kmsan vmalloc Shadow End"}, + [KMSAN_VMALLOC_ORIGIN_START_NR] = {1, 0, "Kmsan vmalloc Origins Start"}, + [KMSAN_VMALLOC_ORIGIN_END_NR] = {0, 0, "Kmsan vmalloc Origins End"}, + [KMSAN_MODULES_SHADOW_START_NR] = {1, 0, "Kmsan Modules Shadow Start"}, + [KMSAN_MODULES_SHADOW_END_NR] = {0, 0, "Kmsan Modules Shadow End"}, + [KMSAN_MODULES_ORIGIN_START_NR] = {1, 0, "Kmsan Modules Origins Start"}, + [KMSAN_MODULES_ORIGIN_END_NR] = {0, 0, "Kmsan Modules Origins End"}, #endif - [MODULES_NR] = {0, "Modules Area Start"}, - [MODULES_END_NR] = {0, "Modules Area End"}, - [ABS_LOWCORE_NR] = {0, "Lowcore Area Start"}, - [ABS_LOWCORE_END_NR] = {0, "Lowcore Area End"}, - [MEMCPY_REAL_NR] = {0, "Real Memory Copy Area Start"}, - [MEMCPY_REAL_END_NR] = {0, "Real Memory Copy Area End"}, + [MODULES_NR] = {1, 0, "Modules Area Start"}, + [MODULES_END_NR] = {0, 0, "Modules Area End"}, + [ABS_LOWCORE_NR] = {1, 0, "Lowcore Area Start"}, + [ABS_LOWCORE_END_NR] = {0, 0, "Lowcore Area End"}, + [MEMCPY_REAL_NR] = {1, 0, "Real Memory Copy Area Start"}, + [MEMCPY_REAL_END_NR] = {0, 0, "Real Memory Copy Area End"}, #ifdef CONFIG_KASAN - [KASAN_SHADOW_START_NR] = {KASAN_SHADOW_START, "Kasan Shadow Start"}, - [KASAN_SHADOW_END_NR] = {KASAN_SHADOW_END, "Kasan Shadow End"}, + [KASAN_SHADOW_START_NR] = {1, KASAN_SHADOW_START, "Kasan Shadow Start"}, + [KASAN_SHADOW_END_NR] = {0, KASAN_SHADOW_END, "Kasan Shadow End"}, #endif - { -1, NULL } + {1, -1UL, NULL} }; struct pg_state { @@ -163,6 +167,19 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) st->wx_pages += (addr - st->start_address) / PAGE_SIZE; } +static void note_page_update_state(struct pg_state *st, unsigned long addr, unsigned int prot, int level) +{ + struct seq_file *m = st->seq; + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); + } + st->start_address = addr; + st->current_prot = prot; + st->level = level; +} + static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, u64 val) { int width = sizeof(unsigned long) * 2; @@ -186,9 +203,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, addr = max_addr; if (st->level == -1) { pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } else if (prot != st->current_prot || level != st->level || addr >= st->marker[1].start_address) { note_prot_wx(st, addr); @@ -202,13 +217,7 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, } pt_dump_seq_printf(m, "%9lu%c ", delta, *unit); print_prot(m, st->current_prot, st->level); - while (addr >= st->marker[1].start_address) { - st->marker++; - pt_dump_seq_printf(m, "---[ %s ]---\n", st->marker->name); - } - st->start_address = addr; - st->current_prot = prot; - st->level = level; + note_page_update_state(st, addr, prot, level); } } @@ -280,22 +289,25 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); #endif /* CONFIG_PTDUMP_DEBUGFS */ -/* - * Heapsort from lib/sort.c is not a stable sorting algorithm, do a simple - * insertion sort to preserve the original order of markers with the same - * start address. - */ -static void sort_address_markers(void) +static int ptdump_cmp(const void *a, const void *b) { - struct addr_marker tmp; - int i, j; + const struct addr_marker *ama = a; + const struct addr_marker *amb = b; - for (i = 1; i < ARRAY_SIZE(address_markers) - 1; i++) { - tmp = address_markers[i]; - for (j = i - 1; j >= 0 && address_markers[j].start_address > tmp.start_address; j--) - address_markers[j + 1] = address_markers[j]; - address_markers[j + 1] = tmp; - } + if (ama->start_address > amb->start_address) + return 1; + if (ama->start_address < amb->start_address) + return -1; + /* + * If the start addresses of two markers are identical consider the + * marker which defines the start of an area higher than the one which + * defines the end of an area. This keeps pairs of markers sorted. + */ + if (ama->is_start) + return 1; + if (amb->is_start) + return -1; + return 0; } static int pt_dump_init(void) @@ -303,6 +315,8 @@ static int pt_dump_init(void) #ifdef CONFIG_KFENCE unsigned long kfence_start = (unsigned long)__kfence_pool; #endif + unsigned long lowcore = (unsigned long)get_lowcore(); + /* * Figure out the maximum virtual address being accessible with the * kernel ASCE. We need this to keep the page table walker functions @@ -310,7 +324,10 @@ static int pt_dump_init(void) */ max_addr = (get_lowcore()->kernel_asce.val & _REGION_ENTRY_TYPE_MASK) >> 2; max_addr = 1UL << (max_addr * 11 + 31); - address_markers[IDENTITY_AFTER_END_NR].start_address = ident_map_size; + address_markers[LOWCORE_START_NR].start_address = lowcore; + address_markers[LOWCORE_END_NR].start_address = lowcore + sizeof(struct lowcore); + address_markers[IDENTITY_START_NR].start_address = __identity_base; + address_markers[IDENTITY_END_NR].start_address = __identity_base + ident_map_size; address_markers[AMODE31_START_NR].start_address = (unsigned long)__samode31; address_markers[AMODE31_END_NR].start_address = (unsigned long)__eamode31; address_markers[MODULES_NR].start_address = MODULES_VADDR; @@ -337,7 +354,8 @@ static int pt_dump_init(void) address_markers[KMSAN_MODULES_ORIGIN_START_NR].start_address = KMSAN_MODULES_ORIGIN_START; address_markers[KMSAN_MODULES_ORIGIN_END_NR].start_address = KMSAN_MODULES_ORIGIN_END; #endif - sort_address_markers(); + sort(address_markers, ARRAY_SIZE(address_markers) - 1, + sizeof(address_markers[0]), ptdump_cmp, NULL); #ifdef CONFIG_PTDUMP_DEBUGFS debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); #endif /* CONFIG_PTDUMP_DEBUGFS */ diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index ddcd39ef43463ade94bc3c94432a0afe6d6840d0..e3d258f9e72610a9d28bc77bdb580daddfcd7db6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -108,6 +108,8 @@ void mark_rodata_ro(void) { unsigned long size = __end_ro_after_init - __start_ro_after_init; + if (MACHINE_HAS_NX) + system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); __set_memory_ro(__start_ro_after_init, __end_ro_after_init); pr_info("Write protected read-only-after-init data: %luk\n", size >> 10); } @@ -170,13 +172,6 @@ void __init mem_init(void) setup_zero_pages(); /* Setup zeroed pages. */ } -void free_initmem(void) -{ - set_memory_rwnx((unsigned long)_sinittext, - (unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT); - free_initmem_default(POISON_FREE_INITMEM); -} - unsigned long memory_block_size_bytes(void) { /* diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 41c714e212927b12dad4aceba5c73b49169531e3..665b8228afebcdaa8b25cc3caf19d20b95a13fc5 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -661,7 +661,6 @@ void __init vmem_map_init(void) { __set_memory_rox(_stext, _etext); __set_memory_ro(_etext, __end_rodata); - __set_memory_rox(_sinittext, _einittext); __set_memory_rox(__stext_amode31, __etext_amode31); /* * If the BEAR-enhancement facility is not installed the first @@ -670,16 +669,8 @@ void __init vmem_map_init(void) */ if (!static_key_enabled(&cpu_has_bear)) set_memory_x(0, 1); - if (debug_pagealloc_enabled()) { - /* - * Use RELOC_HIDE() as long as __va(0) translates to NULL, - * since performing pointer arithmetic on a NULL pointer - * has undefined behavior and generates compiler warnings. - */ - __set_memory_4k(__va(0), RELOC_HIDE(__va(0), ident_map_size)); - } - if (MACHINE_HAS_NX) - system_ctl_set_bit(0, CR0_INSTRUCTION_EXEC_PROTECTION_BIT); + if (debug_pagealloc_enabled()) + __set_memory_4k(__va(0), __va(0) + ident_map_size); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); } diff --git a/arch/s390/tools/relocs.c b/arch/s390/tools/relocs.c index a74dbd5c9896a7b3fe30c104e2b662624dd0b26c..30a732c808f35e837145f692f77f07b122b40dd8 100644 --- a/arch/s390/tools/relocs.c +++ b/arch/s390/tools/relocs.c @@ -280,7 +280,7 @@ static int do_reloc(struct section *sec, Elf_Rel *rel) case R_390_GOTOFF64: break; case R_390_64: - add_reloc(&relocs64, offset - ehdr.e_entry); + add_reloc(&relocs64, offset); break; default: die("Unsupported relocation type: %d\n", r_type); diff --git a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c index e24298a734befd9023f534080bb23680b1f99c7d..a04cd13c6315a2af5f28ad83724d63004e322335 100644 --- a/arch/um/drivers/mconsole_user.c +++ b/arch/um/drivers/mconsole_user.c @@ -71,7 +71,9 @@ static struct mconsole_command *mconsole_parse(struct mc_request *req) return NULL; } +#ifndef MIN #define MIN(a,b) ((a)<(b) ? (a):(b)) +#endif #define STRINGX(x) #x #define STRING(x) STRINGX(x) diff --git a/arch/x86/coco/sev/core.c b/arch/x86/coco/sev/core.c index 082d61d85dfc1022de4266bfbeca880fe20c2e1e..de1df0cb45dab2fe8716ee9c4303f032bba5dd5c 100644 --- a/arch/x86/coco/sev/core.c +++ b/arch/x86/coco/sev/core.c @@ -163,7 +163,7 @@ struct sev_config { */ use_cas : 1, - __reserved : 62; + __reserved : 61; }; static struct sev_config sev_cfg __read_mostly; diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 83073fa3c9891ebfc8d769a8e7a1ee13706a2496..7093ee21c0d1c0410dbf17a77cb70a6bc6736f04 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -344,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal @@ -385,7 +386,6 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e6d133dda70fc596ec9966dcb21a2d984fa7acf5..65ab6460aed4d74e608eac8a82e4db31686a7423 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1522,20 +1522,23 @@ static void x86_pmu_start(struct perf_event *event, int flags) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + unsigned long *cntr_mask, *fixed_cntr_mask; + struct event_constraint *pebs_constraints; + struct cpu_hw_events *cpuc; u64 pebs, debugctl; - int cpu = smp_processor_id(); - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); - unsigned long *cntr_mask = hybrid(cpuc->pmu, cntr_mask); - unsigned long *fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); - struct event_constraint *pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); - unsigned long flags; - int idx; + int cpu, idx; + + guard(irqsave)(); + + cpu = smp_processor_id(); + cpuc = &per_cpu(cpu_hw_events, cpu); + cntr_mask = hybrid(cpuc->pmu, cntr_mask); + fixed_cntr_mask = hybrid(cpuc->pmu, fixed_cntr_mask); + pebs_constraints = hybrid(cpuc->pmu, pebs_constraints); if (!*(u64 *)cntr_mask) return; - local_irq_save(flags); - if (x86_pmu.version >= 2) { rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, ctrl); rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); @@ -1579,7 +1582,6 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", cpu, idx, pmc_count); } - local_irq_restore(flags); } void x86_pmu_stop(struct perf_event *event, int flags) diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 0c9c2706d4ec8069615727ea71311beeb552425a..9e519d8a810a682f1decaa0e096fc17d68e737cd 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4589,6 +4589,25 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) return HYBRID_INTEL_CORE; } +static inline bool erratum_hsw11(struct perf_event *event) +{ + return (event->hw.config & INTEL_ARCH_EVENT_MASK) == + X86_CONFIG(.event=0xc0, .umask=0x01); +} + +/* + * The HSW11 requires a period larger than 100 which is the same as the BDM11. + * A minimum period of 128 is enforced as well for the INST_RETIRED.ALL. + * + * The message 'interrupt took too long' can be observed on any counter which + * was armed with a period < 32 and two events expired in the same NMI. + * A minimum period of 32 is enforced for the rest of the events. + */ +static void hsw_limit_period(struct perf_event *event, s64 *left) +{ + *left = max(*left, erratum_hsw11(event) ? 128 : 32); +} + /* * Broadwell: * @@ -4606,8 +4625,7 @@ static enum hybrid_cpu_type adl_get_hybrid_cpu_type(void) */ static void bdw_limit_period(struct perf_event *event, s64 *left) { - if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == - X86_CONFIG(.event=0xc0, .umask=0x01)) { + if (erratum_hsw11(event)) { if (*left < 128) *left = 128; *left &= ~0x3fULL; @@ -6766,6 +6784,7 @@ __init int intel_pmu_init(void) x86_pmu.hw_config = hsw_hw_config; x86_pmu.get_event_constraints = hsw_get_event_constraints; + x86_pmu.limit_period = hsw_limit_period; x86_pmu.lbr_double_abort = true; extra_attr = boot_cpu_has(X86_FEATURE_RTM) ? hsw_format_attr : nhm_format_attr; diff --git a/arch/x86/include/asm/cmdline.h b/arch/x86/include/asm/cmdline.h index 6faaf27e8899895854ef126a186d8b39120a9a34..6cbd9ae58b21b3a9b2b50769f2399ded2c2816d4 100644 --- a/arch/x86/include/asm/cmdline.h +++ b/arch/x86/include/asm/cmdline.h @@ -2,6 +2,10 @@ #ifndef _ASM_X86_CMDLINE_H #define _ASM_X86_CMDLINE_H +#include + +extern char builtin_cmdline[COMMAND_LINE_SIZE]; + int cmdline_find_option_bool(const char *cmdline_ptr, const char *option); int cmdline_find_option(const char *cmdline_ptr, const char *option, char *buffer, int bufsize); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 950a03e0181ec3b4daa2c050d0c18db4260134b4..4a68cb3eba78f81d4187cbdc3f5f6b529b001140 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1305,6 +1305,7 @@ struct kvm_arch { u8 vm_type; bool has_private_mem; bool has_protected_state; + bool pre_fault_allowed; struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; @@ -2191,6 +2192,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_forced_root_level, #define kvm_arch_has_private_mem(kvm) false #endif +#define kvm_arch_has_readonly_mem(kvm) (!(kvm)->arch.has_protected_state) + static inline u16 kvm_read_ldt(void) { u16 ldt; diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index a053c12939751f5341011bdd157091795b52bf24..68da67df304d5dc6629d1de4723f0abfc93b5d84 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -66,13 +66,15 @@ static inline bool vcpu_is_preempted(long cpu) #ifdef CONFIG_PARAVIRT /* - * virt_spin_lock_key - enables (by default) the virt_spin_lock() hijack. + * virt_spin_lock_key - disables by default the virt_spin_lock() hijack. * - * Native (and PV wanting native due to vCPU pinning) should disable this key. - * It is done in this backwards fashion to only have a single direction change, - * which removes ordering between native_pv_spin_init() and HV setup. + * Native (and PV wanting native due to vCPU pinning) should keep this key + * disabled. Native does not touch the key. + * + * When in a guest then native_pv_lock_init() enables the key first and + * KVM/XEN might conditionally disable it later in the boot process again. */ -DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); +DECLARE_STATIC_KEY_FALSE(virt_spin_lock_key); /* * Shortcut for the queued_spin_lock_slowpath() function that allows diff --git a/arch/x86/kernel/acpi/madt_wakeup.c b/arch/x86/kernel/acpi/madt_wakeup.c index 6cfe762be28bbc72c01cf88c13a7178369d6ada1..d5ef6215583bcf734627058bf9b46dd2b936508f 100644 --- a/arch/x86/kernel/acpi/madt_wakeup.c +++ b/arch/x86/kernel/acpi/madt_wakeup.c @@ -19,7 +19,7 @@ static u64 acpi_mp_wake_mailbox_paddr __ro_after_init; /* Virtual address of the Multiprocessor Wakeup Structure mailbox */ -static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init; +static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox; static u64 acpi_mp_pgd __ro_after_init; static u64 acpi_mp_reset_vector_paddr __ro_after_init; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index be5889bded498401c6ee6b24ccd9b4567ff90cfc..1e0fe5f8ab84e478294b62267176ae01fa540f31 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -462,7 +462,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) switch (c->x86_model) { case 0x00 ... 0x2f: case 0x40 ... 0x4f: - case 0x70 ... 0x7f: + case 0x60 ... 0x7f: setup_force_cpu_cap(X86_FEATURE_ZEN5); break; default: diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index b3fa61d45352e7120d7be7fb5cdb9d6f071c9729..0b69bfbf345d0ddf0e88c1ad6125d5f22c8883a4 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -306,7 +306,7 @@ static void freq_invariance_enable(void) WARN_ON_ONCE(1); return; } - static_branch_enable(&arch_scale_freq_key); + static_branch_enable_cpuslocked(&arch_scale_freq_key); register_freq_invariance_syscore_ops(); pr_info("Estimated ratio of average max frequency by base frequency (times 1024): %llu\n", arch_max_freq_ratio); } @@ -323,8 +323,10 @@ static void __init bp_init_freq_invariance(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) return; - if (intel_set_max_freq_ratio()) + if (intel_set_max_freq_ratio()) { + guard(cpus_read_lock)(); freq_invariance_enable(); + } } static void disable_freq_invariance_workfn(struct work_struct *work) diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 767bf1c71aadda1a63e9d8c43244b219294ac778..2a2fc14955cd3b2b0302486d82b542a280bd36c6 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -609,7 +609,7 @@ void mtrr_save_state(void) { int first_cpu; - if (!mtrr_enabled()) + if (!mtrr_enabled() || !mtrr_state.have_fixed) return; first_cpu = cpumask_first(cpu_online_mask); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 5358d43886adc5409196f3e68308658b05bf9872..fec38153355581215eb93b6301ae90b6f0bd06c5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -51,13 +51,12 @@ DEFINE_ASM_FUNC(pv_native_irq_enable, "sti", .noinstr.text); DEFINE_ASM_FUNC(pv_native_read_cr2, "mov %cr2, %rax", .noinstr.text); #endif -DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); +DEFINE_STATIC_KEY_FALSE(virt_spin_lock_key); void __init native_pv_lock_init(void) { - if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && - !boot_cpu_has(X86_FEATURE_HYPERVISOR)) - static_branch_disable(&virt_spin_lock_key); + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) + static_branch_enable(&virt_spin_lock_key); } static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 5d34cad9b7b180fed4bd74f947039bc880f1fb53..6129dc2ba784cbdafb445e43479ca708f252f52a 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -164,7 +164,7 @@ unsigned long saved_video_mode; static char __initdata command_line[COMMAND_LINE_SIZE]; #ifdef CONFIG_CMDLINE_BOOL -static char __initdata builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; +char builtin_cmdline[COMMAND_LINE_SIZE] = CONFIG_CMDLINE; bool builtin_cmdline_added __ro_after_init; #endif diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4287a8071a3ab60dfda8c26bf06ca7c92bed4831..472a1537b7a977d0ddfa695e0e941fb417b88f94 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -141,8 +141,8 @@ config KVM_AMD_SEV depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) select ARCH_HAS_CC_PLATFORM select KVM_GENERIC_PRIVATE_MEM - select HAVE_KVM_GMEM_PREPARE - select HAVE_KVM_GMEM_INVALIDATE + select HAVE_KVM_ARCH_GMEM_PREPARE + select HAVE_KVM_ARCH_GMEM_INVALIDATE help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 923e64903da9afeeff80f76062c45bd5ab076717..913bfc96959cbfe7b6049b7323dfd1f2127f180b 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -286,7 +286,6 @@ static inline int kvm_hv_hypercall(struct kvm_vcpu *vcpu) return HV_STATUS_ACCESS_DENIED; } static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) {} -static inline void kvm_hv_free_pa_page(struct kvm *kvm) {} static inline bool kvm_hv_synic_has_vector(struct kvm_vcpu *vcpu, int vector) { return false; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a7172ba59ad2e7e95ae3c63f337310e8aa6cb083..5bb481aefcbcdc4c393ea0de917dc4ffc576002f 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -351,10 +351,8 @@ static void kvm_recalculate_logical_map(struct kvm_apic_map *new, * reversing the LDR calculation to get cluster of APICs, i.e. no * additional work is required. */ - if (apic_x2apic_mode(apic)) { - WARN_ON_ONCE(ldr != kvm_apic_calc_x2apic_ldr(kvm_x2apic_id(apic))); + if (apic_x2apic_mode(apic)) return; - } if (WARN_ON_ONCE(!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))) { @@ -1743,7 +1741,7 @@ static void limit_periodic_timer_frequency(struct kvm_lapic *apic) s64 min_period = min_timer_period_us * 1000LL; if (apic->lapic_timer.period < min_period) { - pr_info_ratelimited( + pr_info_once( "vcpu %i: requested %lld ns " "lapic timer period limited to %lld ns\n", apic->vcpu->vcpu_id, @@ -2966,18 +2964,28 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s, bool set) { if (apic_x2apic_mode(vcpu->arch.apic)) { + u32 x2apic_id = kvm_x2apic_id(vcpu->arch.apic); u32 *id = (u32 *)(s->regs + APIC_ID); u32 *ldr = (u32 *)(s->regs + APIC_LDR); u64 icr; if (vcpu->kvm->arch.x2apic_format) { - if (*id != vcpu->vcpu_id) + if (*id != x2apic_id) return -EINVAL; } else { + /* + * Ignore the userspace value when setting APIC state. + * KVM's model is that the x2APIC ID is readonly, e.g. + * KVM only supports delivering interrupts to KVM's + * version of the x2APIC ID. However, for backwards + * compatibility, don't reject attempts to set a + * mismatched ID for userspace that hasn't opted into + * x2apic_format. + */ if (set) - *id >>= 24; + *id = x2apic_id; else - *id <<= 24; + *id = x2apic_id << 24; } /* @@ -2986,7 +2994,7 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, * split to ICR+ICR2 in userspace for backwards compatibility. */ if (set) { - *ldr = kvm_apic_calc_x2apic_ldr(*id); + *ldr = kvm_apic_calc_x2apic_ldr(x2apic_id); icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) | (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 901be9e420a4c5de480a80ac181db03a6f1de045..928cf84778b0c5470b64c79132f4ecc9906891e0 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4335,7 +4335,7 @@ static u8 kvm_max_private_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, if (req_max_level) max_level = min(max_level, req_max_level); - return req_max_level; + return max_level; } static int kvm_faultin_pfn_private(struct kvm_vcpu *vcpu, @@ -4743,6 +4743,9 @@ long kvm_arch_vcpu_pre_fault_memory(struct kvm_vcpu *vcpu, u64 end; int r; + if (!vcpu->kvm->arch.pre_fault_allowed) + return -EOPNOTSUPP; + /* * reload is efficient when called repeatedly, so we can do it on * every iteration. @@ -7510,7 +7513,7 @@ static bool hugepage_has_attrs(struct kvm *kvm, struct kvm_memory_slot *slot, const unsigned long end = start + KVM_PAGES_PER_HPAGE(level); if (level == PG_LEVEL_2M) - return kvm_range_has_memory_attributes(kvm, start, end, attrs); + return kvm_range_has_memory_attributes(kvm, start, end, ~0, attrs); for (gfn = start; gfn < end; gfn += KVM_PAGES_PER_HPAGE(level - 1)) { if (hugepage_test_mixed(slot, gfn, level - 1) || diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index a16c873b32321802372d87dadd4c062e5bd55171..714c517dd4b72bda0879885c7d1a681f897e7ac2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2276,30 +2276,24 @@ static int sev_gmem_post_populate(struct kvm *kvm, gfn_t gfn_start, kvm_pfn_t pf for (gfn = gfn_start, i = 0; gfn < gfn_start + npages; gfn++, i++) { struct sev_data_snp_launch_update fw_args = {0}; - bool assigned; + bool assigned = false; int level; - if (!kvm_mem_is_private(kvm, gfn)) { - pr_debug("%s: Failed to ensure GFN 0x%llx has private memory attribute set\n", - __func__, gfn); - ret = -EINVAL; - goto err; - } - ret = snp_lookup_rmpentry((u64)pfn + i, &assigned, &level); if (ret || assigned) { pr_debug("%s: Failed to ensure GFN 0x%llx RMP entry is initial shared state, ret: %d assigned: %d\n", __func__, gfn, ret, assigned); - ret = -EINVAL; + ret = ret ? -EINVAL : -EEXIST; goto err; } if (src) { void *vaddr = kmap_local_pfn(pfn + i); - ret = copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE); - if (ret) + if (copy_from_user(vaddr, src + i * PAGE_SIZE, PAGE_SIZE)) { + ret = -EFAULT; goto err; + } kunmap_local(vaddr); } @@ -2549,6 +2543,14 @@ static int snp_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) data->gctx_paddr = __psp_pa(sev->snp_context); ret = sev_issue_cmd(kvm, SEV_CMD_SNP_LAUNCH_FINISH, data, &argp->error); + /* + * Now that there will be no more SNP_LAUNCH_UPDATE ioctls, private pages + * can be given to the guest simply by marking the RMP entry as private. + * This can happen on first access and also with KVM_PRE_FAULT_MEMORY. + */ + if (!ret) + kvm->arch.pre_fault_allowed = true; + kfree(id_auth); e_free_id_block: diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index c115d26844f7f6b737bd0423645da08875f7bb50..d6f252555ab3ff0c68ead0ab03013ec8782364ab 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4949,6 +4949,7 @@ static int svm_vm_init(struct kvm *kvm) to_kvm_sev_info(kvm)->need_init = true; kvm->arch.has_private_mem = (type == KVM_X86_SNP_VM); + kvm->arch.pre_fault_allowed = !kvm->arch.has_private_mem; } if (!pause_filter_count || !pause_filter_thresh) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index af6c8cf6a37a3584d1645965f44487b87869cab2..70219e4069874aae9d3c8f70890e3e2deb63a67c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -427,8 +427,7 @@ static void kvm_user_return_msr_cpu_online(void) int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); int err; value = (value & mask) | (msrs->values[slot].host & ~mask); @@ -450,8 +449,7 @@ EXPORT_SYMBOL_GPL(kvm_set_user_return_msr); static void drop_user_return_notifiers(void) { - unsigned int cpu = smp_processor_id(); - struct kvm_user_return_msrs *msrs = per_cpu_ptr(user_return_msrs, cpu); + struct kvm_user_return_msrs *msrs = this_cpu_ptr(user_return_msrs); if (msrs->registered) kvm_on_user_return(&msrs->urn); @@ -12646,6 +12644,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm->arch.vm_type = type; kvm->arch.has_private_mem = (type == KVM_X86_SW_PROTECTED_VM); + /* Decided by the vendor code for other VM types. */ + kvm->arch.pre_fault_allowed = + type == KVM_X86_DEFAULT_VM || type == KVM_X86_SW_PROTECTED_VM; ret = kvm_page_track_init(kvm); if (ret) @@ -13641,19 +13642,14 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm) -{ - return kvm->arch.vm_type == KVM_X86_SNP_VM; -} - +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order) { return kvm_x86_call(gmem_prepare)(kvm, pfn, gfn, max_order); } #endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) { kvm_x86_call(gmem_invalidate)(start, end); diff --git a/arch/x86/lib/cmdline.c b/arch/x86/lib/cmdline.c index 384da1fdd5c6f762135a0676de87874405576917..c65cd55504549acd29ebe1ed20a682c9484c5259 100644 --- a/arch/x86/lib/cmdline.c +++ b/arch/x86/lib/cmdline.c @@ -207,18 +207,29 @@ __cmdline_find_option(const char *cmdline, int max_cmdline_size, int cmdline_find_option_bool(const char *cmdline, const char *option) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; - return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + ret = __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option_bool(builtin_cmdline, COMMAND_LINE_SIZE, option); + + return ret; } int cmdline_find_option(const char *cmdline, const char *option, char *buffer, int bufsize) { - if (IS_ENABLED(CONFIG_CMDLINE_BOOL)) - WARN_ON_ONCE(!builtin_cmdline_added); + int ret; + + ret = __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); + if (ret > 0) + return ret; + + if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && !builtin_cmdline_added) + return __cmdline_find_option(builtin_cmdline, COMMAND_LINE_SIZE, option, buffer, bufsize); - return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option, - buffer, bufsize); + return ret; } diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S index a314622aa09346306db47689b4cb21e8d0313385..d066aecf8aeb22d4f6b01e8fe18a7101723e6491 100644 --- a/arch/x86/lib/getuser.S +++ b/arch/x86/lib/getuser.S @@ -88,12 +88,14 @@ SYM_FUNC_END(__get_user_4) EXPORT_SYMBOL(__get_user_4) SYM_FUNC_START(__get_user_8) +#ifndef CONFIG_X86_64 + xor %ecx,%ecx +#endif check_range size=8 ASM_STAC #ifdef CONFIG_X86_64 UACCESS movq (%_ASM_AX),%rdx #else - xor %ecx,%ecx UACCESS movl (%_ASM_AX),%edx UACCESS movl 4(%_ASM_AX),%ecx #endif diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 2e69abf4f852abb7d8e29c7673dc82f685cf7b89..851ec8f1363a8b389ea4579cc68bf3300a4df27c 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -241,7 +241,7 @@ static pmd_t *pti_user_pagetable_walk_pmd(unsigned long address) * * Returns a pointer to a PTE on success, or NULL on failure. */ -static pte_t *pti_user_pagetable_walk_pte(unsigned long address) +static pte_t *pti_user_pagetable_walk_pte(unsigned long address, bool late_text) { gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO); pmd_t *pmd; @@ -251,10 +251,15 @@ static pte_t *pti_user_pagetable_walk_pte(unsigned long address) if (!pmd) return NULL; - /* We can't do anything sensible if we hit a large mapping. */ + /* Large PMD mapping found */ if (pmd_leaf(*pmd)) { - WARN_ON(1); - return NULL; + /* Clear the PMD if we hit a large mapping from the first round */ + if (late_text) { + set_pmd(pmd, __pmd(0)); + } else { + WARN_ON_ONCE(1); + return NULL; + } } if (pmd_none(*pmd)) { @@ -283,7 +288,7 @@ static void __init pti_setup_vsyscall(void) if (!pte || WARN_ON(level != PG_LEVEL_4K) || pte_none(*pte)) return; - target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR); + target_pte = pti_user_pagetable_walk_pte(VSYSCALL_ADDR, false); if (WARN_ON(!target_pte)) return; @@ -301,7 +306,7 @@ enum pti_clone_level { static void pti_clone_pgtable(unsigned long start, unsigned long end, - enum pti_clone_level level) + enum pti_clone_level level, bool late_text) { unsigned long addr; @@ -374,14 +379,14 @@ pti_clone_pgtable(unsigned long start, unsigned long end, */ *target_pmd = *pmd; - addr += PMD_SIZE; + addr = round_up(addr + 1, PMD_SIZE); } else if (level == PTI_CLONE_PTE) { /* Walk the page-table down to the pte level */ pte = pte_offset_kernel(pmd, addr); if (pte_none(*pte)) { - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); continue; } @@ -390,7 +395,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, return; /* Allocate PTE in the user page-table */ - target_pte = pti_user_pagetable_walk_pte(addr); + target_pte = pti_user_pagetable_walk_pte(addr, late_text); if (WARN_ON(!target_pte)) return; @@ -401,7 +406,7 @@ pti_clone_pgtable(unsigned long start, unsigned long end, /* Clone the PTE */ *target_pte = *pte; - addr += PAGE_SIZE; + addr = round_up(addr + 1, PAGE_SIZE); } else { BUG(); @@ -452,7 +457,7 @@ static void __init pti_clone_user_shared(void) phys_addr_t pa = per_cpu_ptr_to_phys((void *)va); pte_t *target_pte; - target_pte = pti_user_pagetable_walk_pte(va); + target_pte = pti_user_pagetable_walk_pte(va, false); if (WARN_ON(!target_pte)) return; @@ -475,7 +480,7 @@ static void __init pti_clone_user_shared(void) start = CPU_ENTRY_AREA_BASE; end = start + (PAGE_SIZE * CPU_ENTRY_AREA_PAGES); - pti_clone_pgtable(start, end, PTI_CLONE_PMD); + pti_clone_pgtable(start, end, PTI_CLONE_PMD, false); } #endif /* CONFIG_X86_64 */ @@ -492,11 +497,11 @@ static void __init pti_setup_espfix64(void) /* * Clone the populated PMDs of the entry text and force it RO. */ -static void pti_clone_entry_text(void) +static void pti_clone_entry_text(bool late) { pti_clone_pgtable((unsigned long) __entry_text_start, (unsigned long) __entry_text_end, - PTI_CLONE_PMD); + PTI_LEVEL_KERNEL_IMAGE, late); } /* @@ -571,7 +576,7 @@ static void pti_clone_kernel_text(void) * pti_set_kernel_image_nonglobal() did to clear the * global bit. */ - pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE); + pti_clone_pgtable(start, end_clone, PTI_LEVEL_KERNEL_IMAGE, false); /* * pti_clone_pgtable() will set the global bit in any PMDs @@ -638,8 +643,15 @@ void __init pti_init(void) /* Undo all global bits from the init pagetables in head_64.S: */ pti_set_kernel_image_nonglobal(); + /* Replace some of the global bits just for shared entry text: */ - pti_clone_entry_text(); + /* + * This is very early in boot. Device and Late initcalls can do + * modprobe before free_initmem() and mark_readonly(). This + * pti_clone_entry_text() allows those user-mode-helpers to function, + * but notably the text is still RW. + */ + pti_clone_entry_text(false); pti_setup_espfix64(); pti_setup_vsyscall(); } @@ -656,10 +668,11 @@ void pti_finalize(void) if (!boot_cpu_has(X86_FEATURE_PTI)) return; /* - * We need to clone everything (again) that maps parts of the - * kernel image. + * This is after free_initmem() (all initcalls are done) and we've done + * mark_readonly(). Text is now NX which might've split some PMDs + * relative to the early clone. */ - pti_clone_entry_text(); + pti_clone_entry_text(true); pti_clone_kernel_text(); debug_checkwx_user(); diff --git a/block/blk-lib.c b/block/blk-lib.c index 9f735efa6c9459e03f14298382f1c37d31f3b6ab..83eb7761c2bfbd5ef9791e241973b07de7e0b46b 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -111,13 +111,20 @@ static sector_t bio_write_zeroes_limit(struct block_device *bdev) (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask); } +/* + * There is no reliable way for the SCSI subsystem to determine whether a + * device supports a WRITE SAME operation without actually performing a write + * to media. As a result, write_zeroes is enabled by default and will be + * disabled if a zeroing operation subsequently fails. This means that this + * queue limit is likely to change at runtime. + */ static void __blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, - struct bio **biop, unsigned flags) + struct bio **biop, unsigned flags, sector_t limit) { + while (nr_sects) { - unsigned int len = min_t(sector_t, nr_sects, - bio_write_zeroes_limit(bdev)); + unsigned int len = min(nr_sects, limit); struct bio *bio; if ((flags & BLKDEV_ZERO_KILLABLE) && @@ -141,12 +148,14 @@ static void __blkdev_issue_write_zeroes(struct block_device *bdev, static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); struct bio *bio = NULL; struct blk_plug plug; int ret = 0; blk_start_plug(&plug); - __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags); + __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, + flags, limit); if (bio) { if ((flags & BLKDEV_ZERO_KILLABLE) && fatal_signal_pending(current)) { @@ -165,7 +174,7 @@ static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector, * on an I/O error, in which case we'll turn any error into * "not supported" here. */ - if (ret && !bdev_write_zeroes_sectors(bdev)) + if (ret && !limit) return -EOPNOTSUPP; return ret; } @@ -265,12 +274,14 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, unsigned flags) { + sector_t limit = bio_write_zeroes_limit(bdev); + if (bdev_read_only(bdev)) return -EPERM; - if (bdev_write_zeroes_sectors(bdev)) { + if (limit) { __blkdev_issue_write_zeroes(bdev, sector, nr_sects, - gfp_mask, biop, flags); + gfp_mask, biop, flags, limit); } else { if (flags & BLKDEV_ZERO_NOFALLBACK) return -EOPNOTSUPP; diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index cc57e2dd9a0bb3cf0f6443a29800f6a76120bc5e..2cafcf11ee8bee00910c539db74234315272aa7b 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c @@ -38,6 +38,7 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags, void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) { unsigned int users; + unsigned long flags; struct blk_mq_tags *tags = hctx->tags; /* @@ -56,11 +57,11 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx) return; } - spin_lock_irq(&tags->lock); + spin_lock_irqsave(&tags->lock, flags); users = tags->active_queues + 1; WRITE_ONCE(tags->active_queues, users); blk_mq_update_wake_batch(tags, users); - spin_unlock_irq(&tags->lock); + spin_unlock_irqrestore(&tags->lock, flags); } /* diff --git a/block/blk-throttle.c b/block/blk-throttle.c index dc6140fa3de06173d141c5de40825917f0dee16a..6943ec720f39ff9c82eb0ce7b76ce99d37df756e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -31,14 +31,6 @@ static struct workqueue_struct *kthrotld_workqueue; #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) -/* We measure latency for request size from <= 4k to >= 1M */ -#define LATENCY_BUCKET_SIZE 9 - -struct latency_bucket { - unsigned long total_latency; /* ns / 1024 */ - int samples; -}; - struct throtl_data { /* service tree for active throtl groups */ @@ -116,9 +108,6 @@ static unsigned int tg_iops_limit(struct throtl_grp *tg, int rw) return tg->iops[rw]; } -#define request_bucket_index(sectors) \ - clamp_t(int, order_base_2(sectors) - 3, 0, LATENCY_BUCKET_SIZE - 1) - /** * throtl_log - log debug message via blktrace * @sq: the service_queue being reported diff --git a/drivers/accessibility/speakup/genmap.c b/drivers/accessibility/speakup/genmap.c index 0125000e00d9ab5761fba4b2c412a662b1f6b76a..0882bab10fb87a47359476de559d47b884de1623 100644 --- a/drivers/accessibility/speakup/genmap.c +++ b/drivers/accessibility/speakup/genmap.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c index d7d41bb9b05fbb590c436aba29e6a94b2b9af3d0..55e4ef8a93dc99ca3bcbb5416d97fb74ffd0b99f 100644 --- a/drivers/accessibility/speakup/makemapdata.c +++ b/drivers/accessibility/speakup/makemapdata.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include "utils.h" diff --git a/drivers/acpi/acpica/acevents.h b/drivers/acpi/acpica/acevents.h index 2133085deda77f92801488ca59c5ae70b762023f..1c5218b79fc2acb3a6e1616e8b0ce2a9ac357ca7 100644 --- a/drivers/acpi/acpica/acevents.h +++ b/drivers/acpi/acpica/acevents.h @@ -188,13 +188,9 @@ acpi_ev_detach_region(union acpi_operand_object *region_obj, u8 acpi_ns_is_locked); void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function); -void -acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *node, - acpi_adr_space_type space_id); - acpi_status acpi_ev_execute_reg_method(union acpi_operand_object *region_obj, u32 function); diff --git a/drivers/acpi/acpica/evregion.c b/drivers/acpi/acpica/evregion.c index dc6004daf624b6f997cf8b664c7ad0616b05931f..cf53b9535f18e00730781769d1ec54874f440ac7 100644 --- a/drivers/acpi/acpica/evregion.c +++ b/drivers/acpi/acpica/evregion.c @@ -20,6 +20,10 @@ extern u8 acpi_gbl_default_address_spaces[]; /* Local prototypes */ +static void +acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, + acpi_adr_space_type space_id); + static acpi_status acpi_ev_reg_run(acpi_handle obj_handle, u32 level, void *context, void **return_value); @@ -61,6 +65,7 @@ acpi_status acpi_ev_initialize_op_regions(void) acpi_gbl_default_address_spaces [i])) { acpi_ev_execute_reg_methods(acpi_gbl_root_node, + ACPI_UINT32_MAX, acpi_gbl_default_address_spaces [i], ACPI_REG_CONNECT); } @@ -668,6 +673,7 @@ cleanup1: * FUNCTION: acpi_ev_execute_reg_methods * * PARAMETERS: node - Namespace node for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * function - Passed to _REG: On (1) or Off (0) * @@ -679,7 +685,7 @@ cleanup1: ******************************************************************************/ void -acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, +acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, u32 max_depth, acpi_adr_space_type space_id, u32 function) { struct acpi_reg_walk_info info; @@ -713,7 +719,7 @@ acpi_ev_execute_reg_methods(struct acpi_namespace_node *node, * regions and _REG methods. (i.e. handlers must be installed for all * regions of this Space ID before we can run any _REG methods) */ - (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, ACPI_UINT32_MAX, + (void)acpi_ns_walk_namespace(ACPI_TYPE_ANY, node, max_depth, ACPI_NS_WALK_UNLOCK, acpi_ev_reg_run, NULL, &info, NULL); @@ -814,7 +820,7 @@ acpi_ev_reg_run(acpi_handle obj_handle, * ******************************************************************************/ -void +static void acpi_ev_execute_orphan_reg_method(struct acpi_namespace_node *device_node, acpi_adr_space_type space_id) { diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 624361a5f34d8652d205c8f6f4d9d3720ff89edd..95f78383bbdba16bab4d0a2022c87780cf6615bb 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -85,7 +85,8 @@ acpi_install_address_space_handler_internal(acpi_handle device, /* Run all _REG methods for this address space */ if (run_reg) { - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, ACPI_UINT32_MAX, space_id, + ACPI_REG_CONNECT); } unlock_and_exit: @@ -263,6 +264,7 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * FUNCTION: acpi_execute_reg_methods * * PARAMETERS: device - Handle for the device + * max_depth - Depth to which search for _REG * space_id - The address space ID * * RETURN: Status @@ -271,7 +273,8 @@ ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) * ******************************************************************************/ acpi_status -acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +acpi_execute_reg_methods(acpi_handle device, u32 max_depth, + acpi_adr_space_type space_id) { struct acpi_namespace_node *node; acpi_status status; @@ -296,7 +299,8 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + acpi_ev_execute_reg_methods(node, max_depth, space_id, + ACPI_REG_CONNECT); } else { status = AE_BAD_PARAMETER; } @@ -306,57 +310,3 @@ acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) } ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) - -/******************************************************************************* - * - * FUNCTION: acpi_execute_orphan_reg_method - * - * PARAMETERS: device - Handle for the device - * space_id - The address space ID - * - * RETURN: Status - * - * DESCRIPTION: Execute an "orphan" _REG method that appears under an ACPI - * device. This is a _REG method that has no corresponding region - * within the device's scope. - * - ******************************************************************************/ -acpi_status -acpi_execute_orphan_reg_method(acpi_handle device, acpi_adr_space_type space_id) -{ - struct acpi_namespace_node *node; - acpi_status status; - - ACPI_FUNCTION_TRACE(acpi_execute_orphan_reg_method); - - /* Parameter validation */ - - if (!device) { - return_ACPI_STATUS(AE_BAD_PARAMETER); - } - - status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); - if (ACPI_FAILURE(status)) { - return_ACPI_STATUS(status); - } - - /* Convert and validate the device handle */ - - node = acpi_ns_validate_handle(device); - if (node) { - - /* - * If an "orphan" _REG method is present in the device's scope - * for the given address space ID, run it. - */ - - acpi_ev_execute_orphan_reg_method(node, space_id); - } else { - status = AE_BAD_PARAMETER; - } - - (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); - return_ACPI_STATUS(status); -} - -ACPI_EXPORT_SYMBOL(acpi_execute_orphan_reg_method) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 299ec653388ceb8fa3977bb0d88151e3055a1211..38d2f6e6b12b4f7617ce7551deeeb2facb5fc2ed 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -1487,12 +1487,13 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { - acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; acpi_status status; acpi_ec_start(ec, false); if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { + acpi_handle scope_handle = ec == first_ec ? ACPI_ROOT_OBJECT : ec->handle; + acpi_ec_enter_noirq(ec); status = acpi_install_address_space_handler_no_reg(scope_handle, ACPI_ADR_SPACE_EC, @@ -1506,10 +1507,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, } if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { - acpi_execute_reg_methods(scope_handle, ACPI_ADR_SPACE_EC); - if (scope_handle != ec->handle) - acpi_execute_orphan_reg_method(ec->handle, ACPI_ADR_SPACE_EC); - + acpi_execute_reg_methods(ec->handle, ACPI_UINT32_MAX, ACPI_ADR_SPACE_EC); set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } @@ -1724,6 +1722,12 @@ static void acpi_ec_remove(struct acpi_device *device) } } +void acpi_ec_register_opregions(struct acpi_device *adev) +{ + if (first_ec && first_ec->handle != adev->handle) + acpi_execute_reg_methods(adev->handle, 1, ACPI_ADR_SPACE_EC); +} + static acpi_status ec_parse_io_ports(struct acpi_resource *resource, void *context) { diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 601b670356e50e141b172266cf0d3478978a17c4..aadd4c218b320e27a39e0047c11eded1b888dd73 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -223,6 +223,7 @@ int acpi_ec_add_query_handler(struct acpi_ec *ec, u8 query_bit, acpi_handle handle, acpi_ec_query_func func, void *data); void acpi_ec_remove_query_handler(struct acpi_ec *ec, u8 query_bit); +void acpi_ec_register_opregions(struct acpi_device *adev); #ifdef CONFIG_PM_SLEEP void acpi_ec_flush_work(void); diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 59771412686ba3613f7a280aeb6803ec2d155df8..22ae7829a9155de65c7154647f0c919f8f050073 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -2273,6 +2273,8 @@ static int acpi_bus_attach(struct acpi_device *device, void *first_pass) if (device->handler) goto ok; + acpi_ec_register_opregions(device); + if (!device->flags.initialized) { device->flags.power_manageable = device->power.states[ACPI_STATE_D0].flags.valid; diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c index c11cbe5b6eaa6cfc165aed9d0bad0a812a072973..674b9db7a1ef8b021cccf06fa8db6a21a0c0b51b 100644 --- a/drivers/acpi/video_detect.c +++ b/drivers/acpi/video_detect.c @@ -54,6 +54,8 @@ static void acpi_video_parse_cmdline(void) acpi_backlight_cmdline = acpi_backlight_nvidia_wmi_ec; if (!strcmp("apple_gmux", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_apple_gmux; + if (!strcmp("dell_uart", acpi_video_backlight_string)) + acpi_backlight_cmdline = acpi_backlight_dell_uart; if (!strcmp("none", acpi_video_backlight_string)) acpi_backlight_cmdline = acpi_backlight_none; } @@ -821,6 +823,21 @@ static const struct dmi_system_id video_detect_dmi_table[] = { }, }, + /* + * Dell AIO (All in Ones) which advertise an UART attached backlight + * controller board in their ACPI tables (and may even have one), but + * which need native backlight control nevertheless. + */ + { + /* https://bugzilla.redhat.com/show_bug.cgi?id=2303936 */ + .callback = video_detect_force_native, + /* Dell OptiPlex 7760 AIO */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 7760 AIO"), + }, + }, + /* * Models which have nvidia-ec-wmi support, but should not use it. * Note this indicates a likely firmware bug on these models and should @@ -918,6 +935,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto static DEFINE_MUTEX(init_mutex); static bool nvidia_wmi_ec_present; static bool apple_gmux_present; + static bool dell_uart_present; static bool native_available; static bool init_done; static long video_caps; @@ -932,6 +950,7 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto &video_caps, NULL); nvidia_wmi_ec_present = nvidia_wmi_ec_supported(); apple_gmux_present = apple_gmux_detect(NULL, NULL); + dell_uart_present = acpi_dev_present("DELL0501", NULL, -1); init_done = true; } if (native) @@ -962,6 +981,9 @@ enum acpi_backlight_type __acpi_video_get_backlight_type(bool native, bool *auto if (apple_gmux_present) return acpi_backlight_apple_gmux; + if (dell_uart_present) + return acpi_backlight_dell_uart; + /* Use ACPI video if available, except when native should be preferred. */ if ((video_caps & ACPI_VIDEO_BACKLIGHT) && !(native_available && prefer_native_over_acpi_video())) diff --git a/drivers/android/binder.c b/drivers/android/binder.c index f26286e3713ec73388c92ecaa579abc361a617a5..905290c98c3cdb60f77cdf7d30afa03af9a73e29 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -1044,13 +1044,13 @@ static struct binder_ref *binder_get_ref_olocked(struct binder_proc *proc, } /* Find the smallest unused descriptor the "slow way" */ -static u32 slow_desc_lookup_olocked(struct binder_proc *proc) +static u32 slow_desc_lookup_olocked(struct binder_proc *proc, u32 offset) { struct binder_ref *ref; struct rb_node *n; u32 desc; - desc = 1; + desc = offset; for (n = rb_first(&proc->refs_by_desc); n; n = rb_next(n)) { ref = rb_entry(n, struct binder_ref, rb_node_desc); if (ref->data.desc > desc) @@ -1071,21 +1071,18 @@ static int get_ref_desc_olocked(struct binder_proc *proc, u32 *desc) { struct dbitmap *dmap = &proc->dmap; + unsigned int nbits, offset; unsigned long *new, bit; - unsigned int nbits; /* 0 is reserved for the context manager */ - if (node == proc->context->binder_context_mgr_node) { - *desc = 0; - return 0; - } + offset = (node == proc->context->binder_context_mgr_node) ? 0 : 1; if (!dbitmap_enabled(dmap)) { - *desc = slow_desc_lookup_olocked(proc); + *desc = slow_desc_lookup_olocked(proc, offset); return 0; } - if (dbitmap_acquire_first_zero_bit(dmap, &bit) == 0) { + if (dbitmap_acquire_next_zero_bit(dmap, offset, &bit) == 0) { *desc = bit; return 0; } diff --git a/drivers/android/binder_alloc.c b/drivers/android/binder_alloc.c index b00961944ab1d266e55ea2f5d476b22318af24a7..b3acbc4174fb1f5af03e81707c120eeb298f9203 100644 --- a/drivers/android/binder_alloc.c +++ b/drivers/android/binder_alloc.c @@ -939,9 +939,9 @@ void binder_alloc_deferred_release(struct binder_alloc *alloc) __free_page(alloc->pages[i].page_ptr); page_count++; } - kvfree(alloc->pages); } spin_unlock(&alloc->lock); + kvfree(alloc->pages); if (alloc->mm) mmdrop(alloc->mm); diff --git a/drivers/android/dbitmap.h b/drivers/android/dbitmap.h index b8ac7b4764fd8d4e3cf4fa85de5b01f3d6dbafb3..956f1bd087d1c53b6d5c62d1409411b3eab7e5e6 100644 --- a/drivers/android/dbitmap.h +++ b/drivers/android/dbitmap.h @@ -6,8 +6,7 @@ * * Used by the binder driver to optimize the allocation of the smallest * available descriptor ID. Each bit in the bitmap represents the state - * of an ID, with the exception of BIT(0) which is used exclusively to - * reference binder's context manager. + * of an ID. * * A dbitmap can grow or shrink as needed. This part has been designed * considering that users might need to briefly release their locks in @@ -58,11 +57,7 @@ static inline unsigned int dbitmap_shrink_nbits(struct dbitmap *dmap) if (bit < (dmap->nbits >> 2)) return dmap->nbits >> 1; - /* - * Note that find_last_bit() returns dmap->nbits when no bits - * are set. While this is technically not possible here since - * BIT(0) is always set, this check is left for extra safety. - */ + /* find_last_bit() returns dmap->nbits when no bits are set. */ if (bit == dmap->nbits) return NBITS_MIN; @@ -132,16 +127,17 @@ dbitmap_grow(struct dbitmap *dmap, unsigned long *new, unsigned int nbits) } /* - * Finds and sets the first zero bit in the bitmap. Upon success @bit + * Finds and sets the next zero bit in the bitmap. Upon success @bit * is populated with the index and 0 is returned. Otherwise, -ENOSPC * is returned to indicate that a dbitmap_grow() is needed. */ static inline int -dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) +dbitmap_acquire_next_zero_bit(struct dbitmap *dmap, unsigned long offset, + unsigned long *bit) { unsigned long n; - n = find_first_zero_bit(dmap->map, dmap->nbits); + n = find_next_zero_bit(dmap->map, dmap->nbits, offset); if (n == dmap->nbits) return -ENOSPC; @@ -154,9 +150,7 @@ dbitmap_acquire_first_zero_bit(struct dbitmap *dmap, unsigned long *bit) static inline void dbitmap_clear_bit(struct dbitmap *dmap, unsigned long bit) { - /* BIT(0) should always set for the context manager */ - if (bit) - clear_bit(bit, dmap->map); + clear_bit(bit, dmap->map); } static inline int dbitmap_init(struct dbitmap *dmap) @@ -168,8 +162,6 @@ static inline int dbitmap_init(struct dbitmap *dmap) } dmap->nbits = NBITS_MIN; - /* BIT(0) is reserved for the context manager */ - set_bit(0, dmap->map); return 0; } diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c index d6f5e25e1ed8940203714e535ea6920f8c680f3e..473e00a58a8b0c1ed67fb808670219f60efbfa6c 100644 --- a/drivers/ata/libata-scsi.c +++ b/drivers/ata/libata-scsi.c @@ -951,8 +951,19 @@ static void ata_gen_passthru_sense(struct ata_queued_cmd *qc) &sense_key, &asc, &ascq); ata_scsi_set_sense(qc->dev, cmd, sense_key, asc, ascq); } else { - /* ATA PASS-THROUGH INFORMATION AVAILABLE */ - ata_scsi_set_sense(qc->dev, cmd, RECOVERED_ERROR, 0, 0x1D); + /* + * ATA PASS-THROUGH INFORMATION AVAILABLE + * + * Note: we are supposed to call ata_scsi_set_sense(), which + * respects the D_SENSE bit, instead of unconditionally + * generating the sense data in descriptor format. However, + * because hdparm, hddtemp, and udisks incorrectly assume sense + * data in descriptor format, without even looking at the + * RESPONSE CODE field in the returned sense data (to see which + * format the returned sense data is in), we are stuck with + * being bug compatible with older kernels. + */ + scsi_build_sense(cmd, 1, RECOVERED_ERROR, 0, 0x1D); } } diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c index 1b85e8bf4ef91b1f425ee512d6fd16fd363e4b73..f2f36e55a1f4d25b06e8467a949b3d1f5460fd37 100644 --- a/drivers/ata/pata_macio.c +++ b/drivers/ata/pata_macio.c @@ -208,6 +208,19 @@ static const char* macio_ata_names[] = { /* Don't let a DMA segment go all the way to 64K */ #define MAX_DBDMA_SEG 0xff00 +#ifdef CONFIG_PAGE_SIZE_64KB +/* + * The SCSI core requires the segment size to cover at least a page, so + * for 64K page size kernels it must be at least 64K. However the + * hardware can't handle 64K, so pata_macio_qc_prep() will split large + * requests. To handle the split requests the tablesize must be halved. + */ +#define PATA_MACIO_MAX_SEGMENT_SIZE SZ_64K +#define PATA_MACIO_SG_TABLESIZE (MAX_DCMDS / 2) +#else +#define PATA_MACIO_MAX_SEGMENT_SIZE MAX_DBDMA_SEG +#define PATA_MACIO_SG_TABLESIZE MAX_DCMDS +#endif /* * Wait 1s for disk to answer on IDE bus after a hard reset @@ -541,7 +554,8 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) while (sg_len) { /* table overflow should never happen */ - BUG_ON (pi++ >= MAX_DCMDS); + if (WARN_ON_ONCE(pi >= MAX_DCMDS)) + return AC_ERR_SYSTEM; len = (sg_len < MAX_DBDMA_SEG) ? sg_len : MAX_DBDMA_SEG; table->command = cpu_to_le16(write ? OUTPUT_MORE: INPUT_MORE); @@ -553,11 +567,13 @@ static enum ata_completion_errors pata_macio_qc_prep(struct ata_queued_cmd *qc) addr += len; sg_len -= len; ++table; + ++pi; } } /* Should never happen according to Tejun */ - BUG_ON(!pi); + if (WARN_ON_ONCE(!pi)) + return AC_ERR_SYSTEM; /* Convert the last command to an input/output */ table--; @@ -912,16 +928,10 @@ static int pata_macio_do_resume(struct pata_macio_priv *priv) static const struct scsi_host_template pata_macio_sht = { __ATA_BASE_SHT(DRV_NAME), - .sg_tablesize = MAX_DCMDS, + .sg_tablesize = PATA_MACIO_SG_TABLESIZE, /* We may not need that strict one */ .dma_boundary = ATA_DMA_BOUNDARY, - /* - * The SCSI core requires the segment size to cover at least a page, so - * for 64K page size kernels this must be at least 64K. However the - * hardware can't handle 64K, so pata_macio_qc_prep() will split large - * requests. - */ - .max_segment_size = SZ_64K, + .max_segment_size = PATA_MACIO_MAX_SEGMENT_SIZE, .device_configure = pata_macio_device_configure, .sdev_groups = ata_common_sdev_groups, .can_queue = ATA_DEF_QUEUE, diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index e7f713cd70d3fd7a413c2568b8dca8f8cc8ba2c4..a876024d8a05f964fb79fb04bd5447b67f28f20c 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -1118,8 +1118,8 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) rpp->len += skb->len; if (stat & SAR_RSQE_EPDU) { + unsigned int len, truesize; unsigned char *l1l2; - unsigned int len; l1l2 = (unsigned char *) ((unsigned long) skb->data + skb->len - 6); @@ -1189,14 +1189,15 @@ dequeue_rx(struct idt77252_dev *card, struct rsq_entry *rsqe) ATM_SKB(skb)->vcc = vcc; __net_timestamp(skb); + truesize = skb->truesize; vcc->push(vcc, skb); atomic_inc(&vcc->stats->rx); - if (skb->truesize > SAR_FB_SIZE_3) + if (truesize > SAR_FB_SIZE_3) add_rx_skb(card, 3, SAR_FB_SIZE_3, 1); - else if (skb->truesize > SAR_FB_SIZE_2) + else if (truesize > SAR_FB_SIZE_2) add_rx_skb(card, 2, SAR_FB_SIZE_2, 1); - else if (skb->truesize > SAR_FB_SIZE_1) + else if (truesize > SAR_FB_SIZE_1) add_rx_skb(card, 1, SAR_FB_SIZE_1, 1); else add_rx_skb(card, 0, SAR_FB_SIZE_0, 1); diff --git a/drivers/base/core.c b/drivers/base/core.c index 730cae66607c02530d456b61344d431dc313c031..8c0733d3aad8e96c7d25394cd352cae67d06d3a0 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -2640,6 +2641,7 @@ static const char *dev_uevent_name(const struct kobject *kobj) static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) { const struct device *dev = kobj_to_dev(kobj); + struct device_driver *driver; int retval = 0; /* add device node properties if present */ @@ -2668,8 +2670,12 @@ static int dev_uevent(const struct kobject *kobj, struct kobj_uevent_env *env) if (dev->type && dev->type->name) add_uevent_var(env, "DEVTYPE=%s", dev->type->name); - if (dev->driver) - add_uevent_var(env, "DRIVER=%s", dev->driver->name); + /* Synchronize with module_remove_driver() */ + rcu_read_lock(); + driver = READ_ONCE(dev->driver); + if (driver) + add_uevent_var(env, "DRIVER=%s", driver->name); + rcu_read_unlock(); /* Add common DT information about the device */ of_device_uevent(dev, env); @@ -2739,11 +2745,8 @@ static ssize_t uevent_show(struct device *dev, struct device_attribute *attr, if (!env) return -ENOMEM; - /* Synchronize with really_probe() */ - device_lock(dev); /* let the kset specific function add its keys */ retval = kset->uevent_ops->uevent(&dev->kobj, env); - device_unlock(dev); if (retval) goto out; diff --git a/drivers/base/module.c b/drivers/base/module.c index 7af224e6914a104744977c0ac09dc2790166f654..f742ad2a21da021c6d86765febe4ccb51f8c548d 100644 --- a/drivers/base/module.c +++ b/drivers/base/module.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "base.h" static char *make_driver_name(const struct device_driver *drv) @@ -97,6 +98,9 @@ void module_remove_driver(const struct device_driver *drv) if (!drv) return; + /* Synchronize with dev_uevent() */ + synchronize_rcu(); + sysfs_remove_link(&drv->p->kobj, "module"); if (drv->owner) diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index 90a94a111e67e18d53c0cc98e5e3499055a27840..769fa288179d3969f45148a3ced9ee2e00650d18 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -413,6 +413,7 @@ config BT_ATH3K config BT_MTKSDIO tristate "MediaTek HCI SDIO driver" depends on MMC + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI SDIO driver. @@ -425,6 +426,7 @@ config BT_MTKSDIO config BT_MTKUART tristate "MediaTek HCI UART driver" depends on SERIAL_DEV_BUS + depends on USB || !BT_HCIBTUSB_MTK select BT_MTK help MediaTek Bluetooth HCI UART driver. diff --git a/drivers/bluetooth/btintel.c b/drivers/bluetooth/btintel.c index e7a612504ab1d33908521be39790492dd1fb4e40..7d5e4de64e3cec931af76cf197d506bee8eebe67 100644 --- a/drivers/bluetooth/btintel.c +++ b/drivers/bluetooth/btintel.c @@ -2945,9 +2945,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) INTEL_ROM_LEGACY_NO_WBS_SUPPORT)) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (ver.hw_variant == 0x08 && ver.fw_variant == 0x22) - set_bit(HCI_QUIRK_VALID_LE_STATES, - &hdev->quirks); err = btintel_legacy_rom_setup(hdev, &ver); break; @@ -2956,7 +2953,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) case 0x12: /* ThP */ case 0x13: /* HrP */ case 0x14: /* CcP */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); fallthrough; case 0x0c: /* WsP */ /* Apply the device specific HCI quirks @@ -3048,9 +3044,6 @@ static int btintel_setup_combined(struct hci_dev *hdev) /* These variants don't seem to support LE Coded PHY */ set_bit(HCI_QUIRK_BROKEN_LE_CODED, &hdev->quirks); - /* Set Valid LE States quirk */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, ver.hw_variant); @@ -3076,15 +3069,15 @@ static int btintel_setup_combined(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); btintel_set_dsm_reset_method(hdev, &ver_tlv); err = btintel_bootloader_setup_tlv(hdev, &ver_tlv); + if (err) + goto exit_error; + btintel_register_devcoredump_support(hdev); btintel_print_fseq_info(hdev); break; diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c index 0d1a0415557b2d3893c64c5b8c39fe5856b8e677..1c7631f22c522b5054f9740bbfbb9f7e06a928a9 100644 --- a/drivers/bluetooth/btintel_pcie.c +++ b/drivers/bluetooth/btintel_pcie.c @@ -1180,9 +1180,6 @@ static int btintel_pcie_setup(struct hci_dev *hdev) */ set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - /* Apply LE States quirk from solar onwards */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - /* Setup MSFT Extension support */ btintel_set_msft_opcode(hdev, INTEL_HW_VARIANT(ver_tlv.cnvi_bt)); diff --git a/drivers/bluetooth/btmtk.c b/drivers/bluetooth/btmtk.c index b7c348687a773f85e7667b7127fd6442c408d8ce..2b7c80043aa2ebd4c525d78e77af423df142d7cc 100644 --- a/drivers/bluetooth/btmtk.c +++ b/drivers/bluetooth/btmtk.c @@ -437,6 +437,7 @@ int btmtk_process_coredump(struct hci_dev *hdev, struct sk_buff *skb) } EXPORT_SYMBOL_GPL(btmtk_process_coredump); +#if IS_ENABLED(CONFIG_BT_HCIBTUSB_MTK) static void btmtk_usb_wmt_recv(struct urb *urb) { struct hci_dev *hdev = urb->context; @@ -1262,7 +1263,8 @@ int btmtk_usb_suspend(struct hci_dev *hdev) struct btmtk_data *btmtk_data = hci_get_priv(hdev); /* Stop urb anchor for iso data transmission */ - usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); + if (test_bit(BTMTK_ISOPKT_RUNNING, &btmtk_data->flags)) + usb_kill_anchored_urbs(&btmtk_data->isopkt_anchor); return 0; } @@ -1487,6 +1489,7 @@ int btmtk_usb_shutdown(struct hci_dev *hdev) return 0; } EXPORT_SYMBOL_GPL(btmtk_usb_shutdown); +#endif MODULE_AUTHOR("Sean Wang "); MODULE_AUTHOR("Mark Chen "); diff --git a/drivers/bluetooth/btmtksdio.c b/drivers/bluetooth/btmtksdio.c index 39d6898497a40431171b58ba6d22a102f993ff60..497e4c87f5be56d5919f74ba4a53b20a079e0a54 100644 --- a/drivers/bluetooth/btmtksdio.c +++ b/drivers/bluetooth/btmtksdio.c @@ -1148,9 +1148,6 @@ static int btmtksdio_setup(struct hci_dev *hdev) } } - /* Valid LE States quirk for MediaTek 7921 */ - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - break; case 0x7663: case 0x7668: diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index f2f37143c454d5c18adeba6087d97d32bea4905e..fd7991ea76726ea473906270060166cc17be66cb 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -1287,7 +1287,6 @@ void btrtl_set_quirks(struct hci_dev *hdev, struct btrtl_device_info *btrtl_dev) case CHIP_ID_8852C: case CHIP_ID_8851B: case CHIP_ID_8852BT: - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); /* RTL8852C needs to transmit mSBC data continuously without diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index acdba5d77694f811be2b6c9e108ab4cf20594cf2..51d9d4532dda4ee1087879c8ee20ccf8c05b7ed3 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -3956,8 +3956,8 @@ static int btusb_probe(struct usb_interface *intf, if (id->driver_info & BTUSB_WIDEBAND_SPEECH) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (id->driver_info & BTUSB_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(id->driver_info & BTUSB_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); if (id->driver_info & BTUSB_DIGIANSWER) { data->cmdreq_type = USB_TYPE_VENDOR; diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index ca64666769027acba1ec52bf02dbdecb487c9bdc..4b1ad7ea5b95a93ad2d48e4e1a4882651da8312c 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -2160,7 +2160,7 @@ static void qca_power_shutdown(struct hci_uart *hu) qcadev = serdev_device_get_drvdata(hu->serdev); power = qcadev->bt_power; - if (power->pwrseq) { + if (power && power->pwrseq) { pwrseq_power_off(power->pwrseq); set_bit(QCA_BT_OFF, &qca->flags); return; @@ -2187,10 +2187,6 @@ static void qca_power_shutdown(struct hci_uart *hu) } break; - case QCA_QCA6390: - pwrseq_power_off(qcadev->bt_power->pwrseq); - break; - default: gpiod_set_value_cansleep(qcadev->bt_en, 0); } @@ -2416,11 +2412,14 @@ static int qca_serdev_probe(struct serdev_device *serdev) break; case QCA_QCA6390: - qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, - "bluetooth"); - if (IS_ERR(qcadev->bt_power->pwrseq)) - return PTR_ERR(qcadev->bt_power->pwrseq); - break; + if (dev_of_node(&serdev->dev)) { + qcadev->bt_power->pwrseq = devm_pwrseq_get(&serdev->dev, + "bluetooth"); + if (IS_ERR(qcadev->bt_power->pwrseq)) + return PTR_ERR(qcadev->bt_power->pwrseq); + break; + } + fallthrough; default: qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable", @@ -2475,8 +2474,8 @@ static int qca_serdev_probe(struct serdev_device *serdev) set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks); - if (data->capabilities & QCA_CAP_VALID_LE_STATES) - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); + if (!(data->capabilities & QCA_CAP_VALID_LE_STATES)) + set_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks); } return 0; diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index c4046f8f1985ab21ee7266818b3cda4e0a32f630..43e9ac5a3324e2d42c17100d1d6d7ab8f2adb607 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -425,8 +425,6 @@ static int __vhci_create_device(struct vhci_data *data, __u8 opcode) if (opcode & 0x80) set_bit(HCI_QUIRK_RAW_DEVICE, &hdev->quirks); - set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks); - if (hci_register_dev(hdev) < 0) { BT_ERR("Can't register HCI device"); hci_free_dev(hdev); diff --git a/drivers/cache/Kconfig b/drivers/cache/Kconfig index 94abd8f632a7b3eb02b7e387402cee431009ee8a..db51386c663a6640dad549ce427c02c19d0bd62e 100644 --- a/drivers/cache/Kconfig +++ b/drivers/cache/Kconfig @@ -18,6 +18,7 @@ config STARFIVE_STARLINK_CACHE bool "StarFive StarLink Cache controller" depends on RISCV depends on ARCH_STARFIVE + depends on 64BIT select RISCV_DMA_NONCOHERENT select RISCV_NONSTANDARD_CACHE_OPS help diff --git a/drivers/char/ds1620.c b/drivers/char/ds1620.c index cf89a9631107dbd54561c222003033d49c3bd41c..a4f4291b4492cbc3f4b2fb80ef3b4b3b93bf554b 100644 --- a/drivers/char/ds1620.c +++ b/drivers/char/ds1620.c @@ -421,4 +421,5 @@ static void __exit ds1620_exit(void) module_init(ds1620_init); module_exit(ds1620_exit); +MODULE_DESCRIPTION("Dallas Semiconductor DS1620 thermometer driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/char/nwbutton.c b/drivers/char/nwbutton.c index ea378c0ed549868e50050abc50ef2dac7be20d4f..92cee5717237b77899e92cf4bd6de834d98a590b 100644 --- a/drivers/char/nwbutton.c +++ b/drivers/char/nwbutton.c @@ -241,6 +241,7 @@ static void __exit nwbutton_exit (void) MODULE_AUTHOR("Alex Holden"); +MODULE_DESCRIPTION("NetWinder button driver"); MODULE_LICENSE("GPL"); module_init(nwbutton_init); diff --git a/drivers/char/nwflash.c b/drivers/char/nwflash.c index 0973c2c2b01a48aa9bf4cd1084951950318fe5c2..9f52f0306ef71f691e451bcf63bbb2b5794950d0 100644 --- a/drivers/char/nwflash.c +++ b/drivers/char/nwflash.c @@ -618,6 +618,7 @@ static void __exit nwflash_exit(void) iounmap((void *)FLASH_BASE); } +MODULE_DESCRIPTION("NetWinder flash memory driver"); MODULE_LICENSE("GPL"); module_param(flashdebug, bool, 0644); diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c index 5a5afa14ca8cb8267e27a26d226fb7d1c3515c11..45771b1a3716a2ae3fc3c0d558d757841ff0df39 100644 --- a/drivers/char/xillybus/xillyusb.c +++ b/drivers/char/xillybus/xillyusb.c @@ -50,6 +50,7 @@ MODULE_LICENSE("GPL v2"); static const char xillyname[] = "xillyusb"; static unsigned int fifo_buf_order; +static struct workqueue_struct *wakeup_wq; #define USB_VENDOR_ID_XILINX 0x03fd #define USB_VENDOR_ID_ALTERA 0x09fb @@ -569,10 +570,6 @@ static void cleanup_dev(struct kref *kref) * errors if executed. The mechanism relies on that xdev->error is assigned * a non-zero value by report_io_error() prior to queueing wakeup_all(), * which prevents bulk_in_work() from calling process_bulk_in(). - * - * The fact that wakeup_all() and bulk_in_work() are queued on the same - * workqueue makes their concurrent execution very unlikely, however the - * kernel's API doesn't seem to ensure this strictly. */ static void wakeup_all(struct work_struct *work) @@ -627,7 +624,7 @@ static void report_io_error(struct xillyusb_dev *xdev, if (do_once) { kref_get(&xdev->kref); /* xdev is used by work item */ - queue_work(xdev->workq, &xdev->wakeup_workitem); + queue_work(wakeup_wq, &xdev->wakeup_workitem); } } @@ -1906,6 +1903,13 @@ static const struct file_operations xillyusb_fops = { static int xillyusb_setup_base_eps(struct xillyusb_dev *xdev) { + struct usb_device *udev = xdev->udev; + + /* Verify that device has the two fundamental bulk in/out endpoints */ + if (usb_pipe_type_check(udev, usb_sndbulkpipe(udev, MSG_EP_NUM)) || + usb_pipe_type_check(udev, usb_rcvbulkpipe(udev, IN_EP_NUM))) + return -ENODEV; + xdev->msg_ep = endpoint_alloc(xdev, MSG_EP_NUM | USB_DIR_OUT, bulk_out_work, 1, 2); if (!xdev->msg_ep) @@ -1935,14 +1939,15 @@ static int setup_channels(struct xillyusb_dev *xdev, __le16 *chandesc, int num_channels) { - struct xillyusb_channel *chan; + struct usb_device *udev = xdev->udev; + struct xillyusb_channel *chan, *new_channels; int i; chan = kcalloc(num_channels, sizeof(*chan), GFP_KERNEL); if (!chan) return -ENOMEM; - xdev->channels = chan; + new_channels = chan; for (i = 0; i < num_channels; i++, chan++) { unsigned int in_desc = le16_to_cpu(*chandesc++); @@ -1971,6 +1976,15 @@ static int setup_channels(struct xillyusb_dev *xdev, */ if ((out_desc & 0x80) && i < 14) { /* Entry is valid */ + if (usb_pipe_type_check(udev, + usb_sndbulkpipe(udev, i + 2))) { + dev_err(xdev->dev, + "Missing BULK OUT endpoint %d\n", + i + 2); + kfree(new_channels); + return -ENODEV; + } + chan->writable = 1; chan->out_synchronous = !!(out_desc & 0x40); chan->out_seekable = !!(out_desc & 0x20); @@ -1980,6 +1994,7 @@ static int setup_channels(struct xillyusb_dev *xdev, } } + xdev->channels = new_channels; return 0; } @@ -2096,9 +2111,11 @@ static int xillyusb_discovery(struct usb_interface *interface) * just after responding with the IDT, there is no reason for any * work item to be running now. To be sure that xdev->channels * is updated on anything that might run in parallel, flush the - * workqueue, which rarely does anything. + * device's workqueue and the wakeup work item. This rarely + * does anything. */ flush_workqueue(xdev->workq); + flush_work(&xdev->wakeup_workitem); xdev->num_channels = num_channels; @@ -2258,6 +2275,10 @@ static int __init xillyusb_init(void) { int rc = 0; + wakeup_wq = alloc_workqueue(xillyname, 0, 0); + if (!wakeup_wq) + return -ENOMEM; + if (LOG2_INITIAL_FIFO_BUF_SIZE > PAGE_SHIFT) fifo_buf_order = LOG2_INITIAL_FIFO_BUF_SIZE - PAGE_SHIFT; else @@ -2265,12 +2286,17 @@ static int __init xillyusb_init(void) rc = usb_register(&xillyusb_driver); + if (rc) + destroy_workqueue(wakeup_wq); + return rc; } static void __exit xillyusb_exit(void) { usb_deregister(&xillyusb_driver); + + destroy_workqueue(wakeup_wq); } module_init(xillyusb_init); diff --git a/drivers/clk/thead/clk-th1520-ap.c b/drivers/clk/thead/clk-th1520-ap.c index cbc176b27c091e3fa378fb8750771396fcd1a882..17e32ae08720cb96554cd0d2837dd7821b0a56fd 100644 --- a/drivers/clk/thead/clk-th1520-ap.c +++ b/drivers/clk/thead/clk-th1520-ap.c @@ -738,7 +738,7 @@ static struct ccu_div vp_axi_clk = { .hw.init = CLK_HW_INIT_PARENTS_HW("vp-axi", video_pll_clk_parent, &ccu_div_ops, - 0), + CLK_IGNORE_UNUSED), }, }; diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 392a8000b238b52fce006f9c325439be930dc147..c0278d023cfce595b7cd9f8bb72df60477c8792c 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -3405,6 +3405,7 @@ static const struct x86_cpu_id intel_epp_default[] = { */ X86_MATCH_VFM(INTEL_ALDERLAKE_L, HWP_SET_DEF_BALANCE_PERF_EPP(102)), X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), + X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, HWP_SET_DEF_BALANCE_PERF_EPP(32)), X86_MATCH_VFM(INTEL_METEORLAKE_L, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, 179, 64, 16)), X86_MATCH_VFM(INTEL_ARROWLAKE, HWP_SET_EPP_VALUES(HWP_EPP_POWERSAVE, diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c index a663e7566c480d8c684aa7681f3de4371cad0a85..51132a575b2766a6409124e894d340903bff64bb 100644 --- a/drivers/cxl/core/pci.c +++ b/drivers/cxl/core/pci.c @@ -834,11 +834,13 @@ static void cxl_disable_rch_root_ints(struct cxl_dport *dport) void cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) { struct device *dport_dev = dport->dport_dev; - struct pci_host_bridge *host_bridge; - host_bridge = to_pci_host_bridge(dport_dev); - if (host_bridge->native_aer) - dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + if (dport->rch) { + struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport_dev); + + if (host_bridge->native_aer) + dport->rcrb.aer_cap = cxl_rcrb_to_aer(dport_dev, dport->rcrb.base); + } dport->reg_map.host = host; cxl_dport_map_regs(dport); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 11faf1db4fa4820a1a2759b6a32d39281eba0b3d..473421ba7a18a56a330d00979186666caaf0a0c0 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -45,7 +45,6 @@ #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index f23ba62ce127b475a29bd8ed8235fbbc69cd6cbc..ed4e8ddbe76a50e2266cfb9679e76a8169bad447 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -27,7 +27,8 @@ cflags-$(CONFIG_ARM64) += -fpie $(DISABLE_STACKLEAK_PLUGIN) \ cflags-$(CONFIG_ARM) += -DEFI_HAVE_STRLEN -DEFI_HAVE_STRNLEN \ -DEFI_HAVE_MEMCHR -DEFI_HAVE_STRRCHR \ -DEFI_HAVE_STRCMP -fno-builtin -fpic \ - $(call cc-option,-mno-single-pic-base) + $(call cc-option,-mno-single-pic-base) \ + $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_RISCV) += -fpic -DNO_ALTERNATIVE -mno-relax \ $(DISABLE_STACKLEAK_PLUGIN) cflags-$(CONFIG_LOONGARCH) += -fpie @@ -57,6 +58,10 @@ KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_CFI), $(KBUILD_CFLAGS)) # disable LTO KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS)) +# The .data section would be renamed to .data.efistub, therefore, remove +# `-fdata-sections` flag from KBUILD_CFLAGS_KERNEL +KBUILD_CFLAGS_KERNEL := $(filter-out -fdata-sections, $(KBUILD_CFLAGS_KERNEL)) + lib-y := efi-stub-helper.o gop.o secureboot.o tpm.o \ file.o mem.o random.o randomalloc.o pci.o \ skip_spaces.o lib-cmdline.o lib-ctype.o \ diff --git a/drivers/fsi/fsi-core.c b/drivers/fsi/fsi-core.c index 46ac5a8beab72a3e030077b2a521b856d1103fa0..e2e1e9df611543343a8498cdc425528a3e44619a 100644 --- a/drivers/fsi/fsi-core.c +++ b/drivers/fsi/fsi-core.c @@ -1444,5 +1444,6 @@ static void fsi_exit(void) } module_exit(fsi_exit); module_param(discard_errors, int, 0664); +MODULE_DESCRIPTION("FSI core driver"); MODULE_LICENSE("GPL"); MODULE_PARM_DESC(discard_errors, "Don't invoke error handling on bus accesses"); diff --git a/drivers/fsi/fsi-master-aspeed.c b/drivers/fsi/fsi-master-aspeed.c index b0b624c3717bac1b87aee7ea55333cc09be57390..6f5e1bdf7e40e420dbb8523914c5e45d9433dcda 100644 --- a/drivers/fsi/fsi-master-aspeed.c +++ b/drivers/fsi/fsi-master-aspeed.c @@ -670,4 +670,5 @@ static struct platform_driver fsi_master_aspeed_driver = { }; module_platform_driver(fsi_master_aspeed_driver); +MODULE_DESCRIPTION("FSI master driver for AST2600"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-ast-cf.c b/drivers/fsi/fsi-master-ast-cf.c index f8c776ce1b5609380bfbc072355ac482c397ba15..a4c37ff8edd69d8c85d431baa97d3205f408f5bd 100644 --- a/drivers/fsi/fsi-master-ast-cf.c +++ b/drivers/fsi/fsi-master-ast-cf.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0+ // Copyright 2018 IBM Corp /* - * A FSI master controller, using a simple GPIO bit-banging interface + * A FSI master based on Aspeed ColdFire coprocessor */ #include @@ -1438,5 +1438,6 @@ static struct platform_driver fsi_master_acf = { }; module_platform_driver(fsi_master_acf); +MODULE_DESCRIPTION("A FSI master based on Aspeed ColdFire coprocessor"); MODULE_LICENSE("GPL"); MODULE_FIRMWARE(FW_FILE_NAME); diff --git a/drivers/fsi/fsi-master-gpio.c b/drivers/fsi/fsi-master-gpio.c index 10fc344b6b221eaeda9be97b7a336efdcc244913..f761344f487329bfe9795cf2bab051ecbc52703a 100644 --- a/drivers/fsi/fsi-master-gpio.c +++ b/drivers/fsi/fsi-master-gpio.c @@ -892,4 +892,5 @@ static struct platform_driver fsi_master_gpio_driver = { }; module_platform_driver(fsi_master_gpio_driver); +MODULE_DESCRIPTION("A FSI master controller, using a simple GPIO bit-banging interface"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-master-hub.c b/drivers/fsi/fsi-master-hub.c index 6d8b6e8854e563e6c560ae6ec0a7a80c1c3d99bc..6568fed7db3cf531fe423a449aa7a5196537c473 100644 --- a/drivers/fsi/fsi-master-hub.c +++ b/drivers/fsi/fsi-master-hub.c @@ -295,4 +295,5 @@ static struct fsi_driver hub_master_driver = { }; module_fsi_driver(hub_master_driver); +MODULE_DESCRIPTION("FSI hub master driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/fsi/fsi-scom.c b/drivers/fsi/fsi-scom.c index 61dbda9dbe2b0de3c844a768b22736aa557df8f3..411ddc018cd86a99a75b784f28ec04df42833a60 100644 --- a/drivers/fsi/fsi-scom.c +++ b/drivers/fsi/fsi-scom.c @@ -625,4 +625,5 @@ static void scom_exit(void) module_init(scom_init); module_exit(scom_exit); +MODULE_DESCRIPTION("SCOM FSI Client device driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-mlxbf3.c b/drivers/gpio/gpio-mlxbf3.c index d5906d419b0ab996a5286d8cc411929385bf2c4b..10ea71273c891521ac4e4f210d3441faa77e3309 100644 --- a/drivers/gpio/gpio-mlxbf3.c +++ b/drivers/gpio/gpio-mlxbf3.c @@ -39,6 +39,8 @@ #define MLXBF_GPIO_CAUSE_OR_EVTEN0 0x14 #define MLXBF_GPIO_CAUSE_OR_CLRCAUSE 0x18 +#define MLXBF_GPIO_CLR_ALL_INTS GENMASK(31, 0) + struct mlxbf3_gpio_context { struct gpio_chip gc; @@ -82,6 +84,8 @@ static void mlxbf3_gpio_irq_disable(struct irq_data *irqd) val = readl(gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); val &= ~BIT(offset); writel(val, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + + writel(BIT(offset), gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); raw_spin_unlock_irqrestore(&gs->gc.bgpio_lock, flags); gpiochip_disable_irq(gc, offset); @@ -253,6 +257,15 @@ static int mlxbf3_gpio_probe(struct platform_device *pdev) return 0; } +static void mlxbf3_gpio_shutdown(struct platform_device *pdev) +{ + struct mlxbf3_gpio_context *gs = platform_get_drvdata(pdev); + + /* Disable and clear all interrupts */ + writel(0, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_EVTEN0); + writel(MLXBF_GPIO_CLR_ALL_INTS, gs->gpio_cause_io + MLXBF_GPIO_CAUSE_OR_CLRCAUSE); +} + static const struct acpi_device_id mlxbf3_gpio_acpi_match[] = { { "MLNXBF33", 0 }, {} @@ -265,6 +278,7 @@ static struct platform_driver mlxbf3_gpio_driver = { .acpi_match_table = mlxbf3_gpio_acpi_match, }, .probe = mlxbf3_gpio_probe, + .shutdown = mlxbf3_gpio_shutdown, }; module_platform_driver(mlxbf3_gpio_driver); diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index fd0749c0c630f654b482d4565f8219ee07114730..6b2c6b91f96250c7cacda4d6bfdf0df1f33c787a 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -268,6 +268,7 @@ config DRM_EXEC config DRM_GPUVM tristate depends on DRM + select DRM_EXEC help GPU-VM representation providing helpers to manage a GPUs virtual address space diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 916b6b8cf7d9a639021f1f2703be6434b60fa4f9..6dfdff58bffd1fb4819e11f6291a63a9b20af6d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1057,6 +1057,9 @@ static int amdgpu_cs_patch_ibs(struct amdgpu_cs_parser *p, r = amdgpu_ring_parse_cs(ring, p, job, ib); if (r) return r; + + if (ib->sa_bo) + ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); } else { ib->ptr = (uint32_t *)kptr; r = amdgpu_ring_patch_cs_in_place(ring, p, job, ib); @@ -1778,7 +1781,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, struct ttm_operation_ctx ctx = { false, false }; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_bo_va_mapping *mapping; - int r; + int i, r; addr /= AMDGPU_GPU_PAGE_SIZE; @@ -1793,13 +1796,13 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (dma_resv_locking_ctx((*bo)->tbo.base.resv) != &parser->exec.ticket) return -EINVAL; - if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); - r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); - if (r) - return r; - } + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains); + for (i = 0; i < (*bo)->placement.num_placement; i++) + (*bo)->placements[i].flags |= TTM_PL_FLAG_CONTIGUOUS; + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx); + if (r) + return r; return amdgpu_ttm_alloc_gart(&(*bo)->tbo); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 5cb33ac99f7089fb81c4954d8cbb6c434c713be6..c43d1b6e5d66b283056d672f17cec756ee265ca1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -685,16 +685,24 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_free(fpriv, id); break; case AMDGPU_CTX_OP_QUERY_STATE: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_QUERY_STATE2: + if (args->in.flags) + return -EINVAL; r = amdgpu_ctx_query2(adev, fpriv, id, &args->out); break; case AMDGPU_CTX_OP_GET_STABLE_PSTATE: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index ac108fca64fe62a8d7a9429a253ef0b0055fdd25..7b561e8e3cafc004b6ac808e09950494373f8a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -278,7 +278,7 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, msg = RREG32(mmMP0_SMN_C2PMSG_33); if (msg & 0x80000000) break; - usleep_range(1000, 1100); + msleep(1); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 82452606ae6ca8d6b82abdcdb2368c675aa84918..c770cb201e64bc82a7337eb57437b744d02f4ea3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -509,6 +509,16 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + j = i + xcc_id * adev->gfx.num_compute_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.compute_ring[j], + RESET_QUEUES, 0, 0); + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -551,6 +561,18 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) int i, r = 0; int j; + if (adev->enable_mes) { + if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { + for (i = 0; i < adev->gfx.num_gfx_rings; i++) { + j = i + xcc_id * adev->gfx.num_gfx_rings; + amdgpu_mes_unmap_legacy_queue(adev, + &adev->gfx.gfx_ring[j], + PREEMPT_QUEUES, 0, 0); + } + } + return 0; + } + if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; @@ -995,7 +1017,7 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg, uint32_t xcc_ if (amdgpu_device_skip_hw_access(adev)) return 0; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) return amdgpu_mes_rreg(adev, reg); BUG_ON(!ring->funcs->emit_rreg); @@ -1065,7 +1087,7 @@ void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, uint3 if (amdgpu_device_skip_hw_access(adev)) return; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_wreg(adev, reg, v); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c0265902565670e98d28b8f015380840b06eb703..b49b3650fd62173b15c03d38889e63949b5b5bf5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -589,7 +589,8 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) ring = adev->rings[i]; vmhub = ring->vm_hub; - if (ring == &adev->mes.ring || + if (ring == &adev->mes.ring[0] || + ring == &adev->mes.ring[1] || ring == &adev->umsch_mm.ring) continue; @@ -761,7 +762,7 @@ void amdgpu_gmc_fw_reg_write_reg_wait(struct amdgpu_device *adev, unsigned long flags; uint32_t seq; - if (adev->mes.ring.sched.ready) { + if (adev->mes.ring[0].sched.ready) { amdgpu_mes_reg_write_reg_wait(adev, reg0, reg1, ref, mask); return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index febca3130497ea23485f4ad48abf6843ceda8946..4d951a1baefab7bcd347e1b6a14c07454d513843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -156,6 +156,8 @@ struct amdgpu_gmc_funcs { uint64_t addr, uint64_t *flags); /* get the amount of memory used by the vbios for pre-OS console */ unsigned int (*get_vbios_fb_size)(struct amdgpu_device *adev); + /* get the DCC buffer alignment */ + unsigned int (*get_dcc_alignment)(struct amdgpu_device *adev); enum amdgpu_memory_partition (*query_mem_partition_mode)( struct amdgpu_device *adev); @@ -363,6 +365,10 @@ struct amdgpu_gmc { (adev)->gmc.gmc_funcs->override_vm_pte_flags \ ((adev), (vm), (addr), (pte_flags)) #define amdgpu_gmc_get_vbios_fb_size(adev) (adev)->gmc.gmc_funcs->get_vbios_fb_size((adev)) +#define amdgpu_gmc_get_dcc_alignment(adev) ({ \ + typeof(adev) _adev = (adev); \ + _adev->gmc.gmc_funcs->get_dcc_alignment(_adev); \ +}) /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index e238f2832f65b4daa95fb06f9141c5a05bbe5b77..908e134551523366c4adc914cfa192232f5c0036 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -264,9 +264,8 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, struct dma_fence *fence = NULL; int r; - /* Ignore soft recovered fences here */ r = drm_sched_entity_error(s_entity); - if (r && r != -ENODATA) + if (r) goto error; if (!fence && job->gang_submit) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index e499d6ba306b739b9189abb79b9286ede5f5932b..1cb1ec7beefed3fb22ddae4c8b5edf30d8699667 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -103,7 +103,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) if (!amdgpu_mes_log_enable) return 0; - r = amdgpu_bo_create_kernel(adev, AMDGPU_MES_LOG_BUFFER_SIZE, PAGE_SIZE, + r = amdgpu_bo_create_kernel(adev, adev->mes.event_log_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, &adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, @@ -113,7 +113,7 @@ static int amdgpu_mes_event_log_init(struct amdgpu_device *adev) return r; } - memset(adev->mes.event_log_cpu_addr, 0, PAGE_SIZE); + memset(adev->mes.event_log_cpu_addr, 0, adev->mes.event_log_size); return 0; @@ -135,9 +135,11 @@ int amdgpu_mes_init(struct amdgpu_device *adev) idr_init(&adev->mes.queue_id_idr); ida_init(&adev->mes.doorbell_ida); spin_lock_init(&adev->mes.queue_id_lock); - spin_lock_init(&adev->mes.ring_lock); mutex_init(&adev->mes.mutex_hidden); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) + spin_lock_init(&adev->mes.ring_lock[i]); + adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK; adev->mes.vmid_mask_mmhub = 0xffffff00; adev->mes.vmid_mask_gfxhub = 0xffffff00; @@ -163,36 +165,38 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.sdma_hqd_mask[i] = 0xfc; } - r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs); - if (r) { - dev_err(adev->dev, - "(%d) ring trail_fence_offs wb alloc failed\n", r); - goto error_ids; - } - adev->mes.sch_ctx_gpu_addr = - adev->wb.gpu_addr + (adev->mes.sch_ctx_offs * 4); - adev->mes.sch_ctx_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs]; + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) ring trail_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.sch_ctx_gpu_addr[i] = + adev->wb.gpu_addr + (adev->mes.sch_ctx_offs[i] * 4); + adev->mes.sch_ctx_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.sch_ctx_offs[i]]; - r = amdgpu_device_wb_get(adev, &adev->mes.query_status_fence_offs); - if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - dev_err(adev->dev, - "(%d) query_status_fence_offs wb alloc failed\n", r); - goto error_ids; + r = amdgpu_device_wb_get(adev, + &adev->mes.query_status_fence_offs[i]); + if (r) { + dev_err(adev->dev, + "(%d) query_status_fence_offs wb alloc failed\n", + r); + goto error; + } + adev->mes.query_status_fence_gpu_addr[i] = adev->wb.gpu_addr + + (adev->mes.query_status_fence_offs[i] * 4); + adev->mes.query_status_fence_ptr[i] = + (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs[i]]; } - adev->mes.query_status_fence_gpu_addr = - adev->wb.gpu_addr + (adev->mes.query_status_fence_offs * 4); - adev->mes.query_status_fence_ptr = - (uint64_t *)&adev->wb.wb[adev->mes.query_status_fence_offs]; r = amdgpu_device_wb_get(adev, &adev->mes.read_val_offs); if (r) { - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); dev_err(adev->dev, "(%d) read_val_offs alloc failed\n", r); - goto error_ids; + goto error; } adev->mes.read_val_gpu_addr = adev->wb.gpu_addr + (adev->mes.read_val_offs * 4); @@ -212,10 +216,16 @@ int amdgpu_mes_init(struct amdgpu_device *adev) error_doorbell: amdgpu_mes_doorbell_free(adev); error: - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); -error_ids: + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + idr_destroy(&adev->mes.pasid_idr); idr_destroy(&adev->mes.gang_id_idr); idr_destroy(&adev->mes.queue_id_idr); @@ -226,13 +236,22 @@ error_ids: void amdgpu_mes_fini(struct amdgpu_device *adev) { + int i; + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { + if (adev->mes.sch_ctx_ptr[i]) + amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs[i]); + if (adev->mes.query_status_fence_ptr[i]) + amdgpu_device_wb_free(adev, + adev->mes.query_status_fence_offs[i]); + } + if (adev->mes.read_val_ptr) + amdgpu_device_wb_free(adev, adev->mes.read_val_offs); + amdgpu_mes_doorbell_free(adev); idr_destroy(&adev->mes.pasid_idr); @@ -1499,7 +1518,7 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - if (adev->enable_uni_mes && pipe == AMDGPU_MES_SCHED_PIPE) { + if (adev->enable_uni_mes) { snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_uni_mes.bin", ucode_prefix); } else if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && @@ -1573,7 +1592,7 @@ static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) uint32_t *mem = (uint32_t *)(adev->mes.event_log_cpu_addr); seq_hex_dump(m, "", DUMP_PREFIX_OFFSET, 32, 4, - mem, AMDGPU_MES_LOG_BUFFER_SIZE, false); + mem, adev->mes.event_log_size, false); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index e11051271f71db9a4d07aeb7f26ec03c174bfcc0..0bc837dab578f96798787a535ec75446496a6ff4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -52,7 +52,6 @@ enum amdgpu_mes_priority_level { #define AMDGPU_MES_PROC_CTX_SIZE 0x1000 /* one page area */ #define AMDGPU_MES_GANG_CTX_SIZE 0x1000 /* one page area */ -#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 /* Maximu log buffer size for MES */ struct amdgpu_mes_funcs; @@ -83,8 +82,8 @@ struct amdgpu_mes { uint64_t default_process_quantum; uint64_t default_gang_quantum; - struct amdgpu_ring ring; - spinlock_t ring_lock; + struct amdgpu_ring ring[AMDGPU_MAX_MES_PIPES]; + spinlock_t ring_lock[AMDGPU_MAX_MES_PIPES]; const struct firmware *fw[AMDGPU_MAX_MES_PIPES]; @@ -113,12 +112,12 @@ struct amdgpu_mes { uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES]; uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES]; uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS]; - uint32_t sch_ctx_offs; - uint64_t sch_ctx_gpu_addr; - uint64_t *sch_ctx_ptr; - uint32_t query_status_fence_offs; - uint64_t query_status_fence_gpu_addr; - uint64_t *query_status_fence_ptr; + uint32_t sch_ctx_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t sch_ctx_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *sch_ctx_ptr[AMDGPU_MAX_MES_PIPES]; + uint32_t query_status_fence_offs[AMDGPU_MAX_MES_PIPES]; + uint64_t query_status_fence_gpu_addr[AMDGPU_MAX_MES_PIPES]; + uint64_t *query_status_fence_ptr[AMDGPU_MAX_MES_PIPES]; uint32_t read_val_offs; uint64_t read_val_gpu_addr; uint32_t *read_val_ptr; @@ -135,8 +134,9 @@ struct amdgpu_mes { unsigned long *doorbell_bitmap; /* MES event log buffer */ - struct amdgpu_bo *event_log_gpu_obj; - uint64_t event_log_gpu_addr; + uint32_t event_log_size; + struct amdgpu_bo *event_log_gpu_obj; + uint64_t event_log_gpu_addr; void *event_log_cpu_addr; /* ip specific functions */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0c856005df6b95642af7b7ae4bdbf6a33a86f7ee..38face981c3e38779f4f93db6a0cdf739e6d60a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -166,6 +166,9 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t if (ret) return -EFAULT; + if (ta_bin_len > PSP_1_MEG) + return -EINVAL; + copy_pos += sizeof(uint32_t); ta_bin = kzalloc(ta_bin_len, GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index ad49cecb20b8b2525bb7a8922a5db76213285604..e6344a6b0a9f6af7e0df6fd0d7d012f693db5104 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -212,6 +212,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) sched_hw_submission = max(sched_hw_submission, 256); + if (ring->funcs->type == AMDGPU_RING_TYPE_MES) + sched_hw_submission = 8; else if (ring == &adev->sdma.instance[0].page) sched_hw_submission = 256; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1a5439abd1a043d68c378c3fbb41c9e167c4c60f..c87d68d4be536539c8bcca899cbc03058bd19c7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -461,8 +461,11 @@ struct amdgpu_vcn5_fw_shared { struct amdgpu_fw_shared_unified_queue_struct sq; uint8_t pad1[8]; struct amdgpu_fw_shared_fw_logging fw_log; + uint8_t pad2[20]; struct amdgpu_fw_shared_rb_setup rb_setup; - uint8_t pad2[4]; + struct amdgpu_fw_shared_smu_interface_info smu_dpm_interface; + struct amdgpu_fw_shared_drm_key_wa drm_key_wa; + uint8_t pad3[9]; }; #define VCN_BLOCK_ENCODE_DISABLE_MASK 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 111c380f929b597d65f32bc3f3a0cc62973a8dac..b287a82e6177e9b3614595ada9eba8f721fc1363 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -858,7 +858,7 @@ void amdgpu_virt_post_reset(struct amdgpu_device *adev) adev->gfx.is_poweron = false; } - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f91cc149d06c8802e6cac7ba9dd348c1e7349491..7d26a962f811cf0bb2b2fe2ff28a5a56f0dccd81 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -456,6 +456,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, u64 vis_usage = 0, max_bytes, min_block_size; struct amdgpu_vram_mgr_resource *vres; u64 size, remaining_size, lpfn, fpfn; + unsigned int adjust_dcc_size = 0; struct drm_buddy *mm = &mgr->mm; struct drm_buddy_block *block; unsigned long pages_per_block; @@ -511,7 +512,19 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, /* Allocate blocks in desired range */ vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; + if (bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC && + adev->gmc.gmc_funcs->get_dcc_alignment) + adjust_dcc_size = amdgpu_gmc_get_dcc_alignment(adev); + remaining_size = (u64)vres->base.size; + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + unsigned int dcc_size; + + dcc_size = roundup_pow_of_two(vres->base.size + adjust_dcc_size); + remaining_size = (u64)dcc_size; + + vres->flags |= DRM_BUDDY_TRIM_DISABLE; + } mutex_lock(&mgr->lock); while (remaining_size) { @@ -521,8 +534,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, min_block_size = mgr->default_page_size; size = remaining_size; - if ((size >= (u64)pages_per_block << PAGE_SHIFT) && - !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) + + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) + min_block_size = size; + else if ((size >= (u64)pages_per_block << PAGE_SHIFT) && + !(size & (((u64)pages_per_block << PAGE_SHIFT) - 1))) min_block_size = (u64)pages_per_block << PAGE_SHIFT; BUG_ON(min_block_size < mm->chunk_size); @@ -553,6 +569,22 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, } mutex_unlock(&mgr->lock); + if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS && adjust_dcc_size) { + struct drm_buddy_block *dcc_block; + unsigned long dcc_start; + u64 trim_start; + + dcc_block = amdgpu_vram_mgr_first_block(&vres->blocks); + /* Adjust the start address for DCC buffers only */ + dcc_start = + roundup((unsigned long)amdgpu_vram_mgr_block_start(dcc_block), + adjust_dcc_size); + trim_start = (u64)dcc_start; + drm_buddy_block_trim(mm, &trim_start, + (u64)vres->base.size, + &vres->blocks); + } + vres->base.start = 0; size = max_t(u64, amdgpu_vram_mgr_blocks_size(&vres->blocks), vres->base.size); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 2957702fca0c668801d05c79e6a3d9b00be5ce8e..e444e621ddaa01d185549bf9a0d1f311fad1b209 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4116,6 +4116,7 @@ static void gfx_v10_0_check_gfxoff_flag(struct amdgpu_device *adev) static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) { + char fw_name[53]; char ucode_prefix[30]; const char *wks = ""; int err; @@ -4149,8 +4150,8 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_CE); if (!amdgpu_sriov_vf(adev)) { - err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, - "amdgpu/%s_rlc.bin", ucode_prefix); + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", ucode_prefix); + err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index f384be0d1800c2ba90f9a3dcd5adec27ffd28124..2c611b8577a7ec17cd9f908ce31456e290481787 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -202,6 +202,12 @@ static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) }; +static const struct soc15_reg_golden golden_settings_gc_12_0[] = { + SOC15_REG_GOLDEN_VALUE(GC, 0, regDB_MEM_CONFIG, 0x0000000f, 0x0000000f), + SOC15_REG_GOLDEN_VALUE(GC, 0, regCB_HW_CONTROL_1, 0x03000000, 0x03000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, regGL2C_CTRL5, 0x00000070, 0x00000020) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -3432,6 +3438,24 @@ static void gfx_v12_0_disable_gpa_mode(struct amdgpu_device *adev) WREG32_SOC15(GC, 0, regCPG_PSP_DEBUG, data); } +static void gfx_v12_0_init_golden_registers(struct amdgpu_device *adev) +{ + if (amdgpu_sriov_vf(adev)) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->rev_id == 0) + soc15_program_register_sequence(adev, + golden_settings_gc_12_0, + (const u32)ARRAY_SIZE(golden_settings_gc_12_0)); + break; + default: + break; + } +} + static int gfx_v12_0_hw_init(void *handle) { int r; @@ -3472,6 +3496,9 @@ static int gfx_v12_0_hw_init(void *handle) } } + if (!amdgpu_emu_mode) + gfx_v12_0_init_golden_registers(adev); + adev->gfx.is_poweron = true; if (get_gb_addr_config(adev)) @@ -3519,33 +3546,9 @@ static int gfx_v12_0_hw_init(void *handle) return r; } -static int gfx_v12_0_kiq_disable_kgq(struct amdgpu_device *adev) -{ - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - int i, r = 0; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_gfx_rings)) - return -ENOMEM; - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.gfx_ring[i], - PREEMPT_QUEUES, 0, 0); - - if (adev->gfx.kiq[0].ring.sched.ready) - r = amdgpu_ring_test_helper(kiq_ring); - - return r; -} - static int gfx_v12_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; uint32_t tmp; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); @@ -3553,8 +3556,7 @@ static int gfx_v12_0_hw_fini(void *handle) if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { - r = gfx_v12_0_kiq_disable_kgq(adev); - if (r) + if (amdgpu_gfx_disable_kgq(adev, 0)) DRM_ERROR("KGQ disable failed\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index b88a6fa173b3628904ea1f8778bb9b0b18589b76..2797fd84432b22d6ded588a047f40219187a282a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -231,7 +231,7 @@ static void gmc_v11_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { amdgpu_gmc_fw_reg_write_reg_wait(adev, req, ack, inv_req, 1 << vmid, GET_INST(GC, 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index fd3ac483760e6d85e87aa4d1c121b358819d24b5..edcb5351f8cca7ce8f302cacd04189c12bf1cf1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -299,7 +299,7 @@ static void gmc_v12_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, /* This is necessary for SRIOV as well as for GFXOFF to function * properly under bare metal */ - if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring.sched.ready) && + if ((adev->gfx.kiq[0].ring.sched.ready || adev->mes.ring[0].sched.ready) && (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev))) { struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; const unsigned eng = 17; @@ -542,6 +542,23 @@ static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) return 0; } +static unsigned int gmc_v12_0_get_dcc_alignment(struct amdgpu_device *adev) +{ + unsigned int max_tex_channel_caches, alignment; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(12, 0, 1)) + return 0; + + max_tex_channel_caches = adev->gfx.config.max_texture_channel_caches; + if (is_power_of_2(max_tex_channel_caches)) + alignment = (unsigned int)(max_tex_channel_caches / SZ_4); + else + alignment = roundup_pow_of_two(max_tex_channel_caches); + + return (unsigned int)(alignment * max_tex_channel_caches * SZ_1K); +} + static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .flush_gpu_tlb = gmc_v12_0_flush_gpu_tlb, .flush_gpu_tlb_pasid = gmc_v12_0_flush_gpu_tlb_pasid, @@ -551,6 +568,7 @@ static const struct amdgpu_gmc_funcs gmc_v12_0_gmc_funcs = { .get_vm_pde = gmc_v12_0_get_vm_pde, .get_vm_pte = gmc_v12_0_get_vm_pte, .get_vbios_fb_size = gmc_v12_0_get_vbios_fb_size, + .get_dcc_alignment = gmc_v12_0_get_dcc_alignment, }; static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 99adf3625657e9b7362708e56c27fc5d57d5a07d..98aa3ccd0d20244cc95a61cc0b854d7d12d46f79 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -538,11 +538,11 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index ad524ddc9760a8203a1ae35a44cbe75967624683..6ae5a784e18746e207ba51c5da7cb402ded49b1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -23,6 +23,7 @@ #include "amdgpu.h" #include "amdgpu_jpeg.h" +#include "amdgpu_cs.h" #include "soc15.h" #include "soc15d.h" #include "jpeg_v4_0_3.h" @@ -782,11 +783,15 @@ void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + + if (ring->funcs->parse_cs) + amdgpu_ring_write(ring, 0); + else + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JPEG_VMID_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); - amdgpu_ring_write(ring, (vmid | (vmid << 4))); + amdgpu_ring_write(ring, (vmid | (vmid << 4) | (vmid << 8))); amdgpu_ring_write(ring, PACKETJ(regUVD_LMI_JRBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET, 0, 0, PACKETJ_TYPE0)); @@ -1084,6 +1089,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .get_rptr = jpeg_v4_0_3_dec_ring_get_rptr, .get_wptr = jpeg_v4_0_3_dec_ring_get_wptr, .set_wptr = jpeg_v4_0_3_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + @@ -1248,3 +1254,56 @@ static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev) { adev->jpeg.ras = &jpeg_v4_0_3_ras; } + +/** + * jpeg_v4_0_3_dec_ring_parse_cs - command submission parser + * + * @parser: Command submission parser context + * @job: the job to parse + * @ib: the IB to parse + * + * Parse the command stream, return -EINVAL for invalid packet, + * 0 otherwise + */ +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib) +{ + uint32_t i, reg, res, cond, type; + struct amdgpu_device *adev = parser->adev; + + for (i = 0; i < ib->length_dw ; i += 2) { + reg = CP_PACKETJ_GET_REG(ib->ptr[i]); + res = CP_PACKETJ_GET_RES(ib->ptr[i]); + cond = CP_PACKETJ_GET_COND(ib->ptr[i]); + type = CP_PACKETJ_GET_TYPE(ib->ptr[i]); + + if (res) /* only support 0 at the moment */ + return -EINVAL; + + switch (type) { + case PACKETJ_TYPE0: + if (cond != PACKETJ_CONDITION_CHECK0 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE3: + if (cond != PACKETJ_CONDITION_CHECK3 || reg < JPEG_REG_RANGE_START || reg > JPEG_REG_RANGE_END) { + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + } + break; + case PACKETJ_TYPE6: + if (ib->ptr[i] == CP_PACKETJ_NOP) + continue; + dev_err(adev->dev, "Invalid packet [0x%08x]!\n", ib->ptr[i]); + return -EINVAL; + default: + dev_err(adev->dev, "Unknown packet type %d !\n", type); + return -EINVAL; + } + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f68564c381ecca77c13bfa08a5eb31f7c..71c54b294e157e060e54307bcfbda336d93a484b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -46,6 +46,9 @@ #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000 +#define JPEG_REG_RANGE_START 0x4000 +#define JPEG_REG_RANGE_END 0x41c2 + extern const struct amdgpu_ip_block_version jpeg_v4_0_3_ip_block; void jpeg_v4_0_3_dec_ring_emit_ib(struct amdgpu_ring *ring, @@ -62,5 +65,7 @@ void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void jpeg_v4_0_3_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, uint32_t val, uint32_t mask); - +int jpeg_v4_0_3_dec_ring_parse_cs(struct amdgpu_cs_parser *parser, + struct amdgpu_job *job, + struct amdgpu_ib *ib); #endif /* __JPEG_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index d694a276498a1ea4e82c52d53ede1b5cc036a102..f4daff90c7709daa16b5b5ece2adcf56892e0a3c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -646,6 +646,7 @@ static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { .get_rptr = jpeg_v5_0_0_dec_ring_get_rptr, .get_wptr = jpeg_v5_0_0_dec_ring_get_wptr, .set_wptr = jpeg_v5_0_0_dec_ring_set_wptr, + .parse_cs = jpeg_v4_0_3_dec_ring_parse_cs, .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 8ce51b9236c1ae50331768745223786077b8b1d2..2ea8223eb969aeb8a58affc8711875cfa48ad53d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -162,13 +162,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[0]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -191,11 +191,18 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(&mes->ring_lock[0], flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -208,14 +215,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); op_str = mes_v11_0_get_op_string(x_pkt); misc_op_str = mes_v11_0_get_misc_op_string(x_pkt); @@ -229,7 +235,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) @@ -252,8 +258,12 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(&mes->ring_lock[0], flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -512,9 +522,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr[0]; mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; + mes->query_status_fence_gpu_addr[0]; for (i = 0; i < MAX_COMPUTE_PIPES; i++) mes_set_hw_res_pkt.compute_hqd_mask[i] = @@ -1015,7 +1025,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); return amdgpu_ring_test_helper(kiq_ring); } @@ -1029,7 +1039,7 @@ static int mes_v11_0_queue_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1071,7 +1081,7 @@ static int mes_v11_0_ring_init(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; ring->funcs = &mes_v11_0_ring_funcs; @@ -1124,7 +1134,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, if (pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; + ring = &adev->mes.ring[0]; else BUG(); @@ -1163,6 +1173,8 @@ static int mes_v11_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v11_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; @@ -1198,9 +1210,6 @@ static int mes_v11_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1214,12 +1223,12 @@ static int mes_v11_0_sw_fini(void *handle) &adev->gfx.kiq[0].ring.mqd_gpu_addr, &adev->gfx.kiq[0].ring.mqd_ptr); - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + amdgpu_bo_free_kernel(&adev->mes.ring[0].mqd_obj, + &adev->mes.ring[0].mqd_gpu_addr, + &adev->mes.ring[0].mqd_ptr); amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + amdgpu_ring_fini(&adev->mes.ring[0]); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v11_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1330,9 +1339,9 @@ failure: static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v11_0_kiq_dequeue(&adev->mes.ring); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + mes_v11_0_kiq_dequeue(&adev->mes.ring[0]); + adev->mes.ring[0].sched.ready = false; } if (amdgpu_sriov_vf(adev)) { @@ -1350,7 +1359,7 @@ static int mes_v11_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; if (!adev->enable_mes_kiq) { @@ -1395,7 +1404,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index c9f74231ad595158bf94b8761b95d9e148ed4535..e39a58d262c94e2392bd25922cd0a8866e9d51a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -142,19 +142,20 @@ static const char *mes_v12_0_get_misc_op_string(union MESAPI__MISC *x_pkt) } static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, - void *pkt, int size, - int api_status_off) + int pipe, void *pkt, int size, + int api_status_off) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; signed long timeout = 3000000; /* 3000 ms */ struct amdgpu_device *adev = mes->adev; - struct amdgpu_ring *ring = &mes->ring; + struct amdgpu_ring *ring = &mes->ring[pipe]; + spinlock_t *ring_lock = &mes->ring_lock[pipe]; struct MES_API_STATUS *api_status; union MESAPI__MISC *x_pkt = pkt; const char *op_str, *misc_op_str; unsigned long flags; u64 status_gpu_addr; - u32 status_offset; + u32 seq, status_offset; u64 *status_ptr; signed long r; int ret; @@ -177,11 +178,18 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, status_ptr = (u64 *)&adev->wb.wb[status_offset]; *status_ptr = 0; - spin_lock_irqsave(&mes->ring_lock, flags); + spin_lock_irqsave(ring_lock, flags); r = amdgpu_ring_alloc(ring, (size + sizeof(mes_status_pkt)) / 4); if (r) goto error_unlock_free; + seq = ++ring->fence_drv.sync_seq; + r = amdgpu_fence_wait_polling(ring, + seq - ring->fence_drv.num_fences_mask, + timeout); + if (r < 1) + goto error_undo; + api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off); api_status->api_completion_fence_addr = status_gpu_addr; api_status->api_completion_fence_value = 1; @@ -194,39 +202,39 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_status_pkt.api_status.api_completion_fence_addr = ring->fence_drv.gpu_addr; - mes_status_pkt.api_status.api_completion_fence_value = - ++ring->fence_drv.sync_seq; + mes_status_pkt.api_status.api_completion_fence_value = seq; amdgpu_ring_write_multiple(ring, &mes_status_pkt, sizeof(mes_status_pkt) / 4); amdgpu_ring_commit(ring); - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); op_str = mes_v12_0_get_op_string(x_pkt); misc_op_str = mes_v12_0_get_misc_op_string(x_pkt); if (misc_op_str) - dev_dbg(adev->dev, "MES msg=%s (%s) was emitted\n", op_str, - misc_op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s (%s) was emitted\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_dbg(adev->dev, "MES msg=%s was emitted\n", op_str); + dev_dbg(adev->dev, "MES(%d) msg=%s was emitted\n", + pipe, op_str); else - dev_dbg(adev->dev, "MES msg=%d was emitted\n", - x_pkt->header.opcode); + dev_dbg(adev->dev, "MES(%d) msg=%d was emitted\n", + pipe, x_pkt->header.opcode); - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq, timeout); + r = amdgpu_fence_wait_polling(ring, seq, timeout); if (r < 1 || !*status_ptr) { if (misc_op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s (%s)\n", - op_str, misc_op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s (%s)\n", + pipe, op_str, misc_op_str); else if (op_str) - dev_err(adev->dev, "MES failed to respond to msg=%s\n", - op_str); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%s\n", + pipe, op_str); else - dev_err(adev->dev, "MES failed to respond to msg=%d\n", - x_pkt->header.opcode); + dev_err(adev->dev, "MES(%d) failed to respond to msg=%d\n", + pipe, x_pkt->header.opcode); while (halt_if_hws_hang) schedule(); @@ -238,8 +246,12 @@ static int mes_v12_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes, amdgpu_device_wb_free(adev, status_offset); return 0; +error_undo: + dev_err(adev->dev, "MES ring buffer is full.\n"); + amdgpu_ring_undo(ring); + error_unlock_free: - spin_unlock_irqrestore(&mes->ring_lock, flags); + spin_unlock_irqrestore(ring_lock, flags); error_wb_free: amdgpu_device_wb_free(adev, status_offset); @@ -254,6 +266,8 @@ static int convert_to_mes_queue_type(int queue_type) return MES_QUEUE_TYPE_COMPUTE; else if (queue_type == AMDGPU_RING_TYPE_SDMA) return MES_QUEUE_TYPE_SDMA; + else if (queue_type == AMDGPU_RING_TYPE_MES) + return MES_QUEUE_TYPE_SCHQ; else BUG(); return -1; @@ -311,6 +325,7 @@ static int mes_v12_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.gds_size = input->queue_size; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -330,6 +345,7 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr; return mes_v12_0_submit_pkt_and_poll_completion(mes, + AMDGPU_MES_SCHED_PIPE, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -338,6 +354,7 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, struct mes_map_legacy_queue_input *input) { union MESAPI__ADD_QUEUE mes_add_queue_pkt; + int pipe; memset(&mes_add_queue_pkt, 0, sizeof(mes_add_queue_pkt)); @@ -354,7 +371,12 @@ static int mes_v12_0_map_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); mes_add_queue_pkt.map_legacy_kq = 1; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_add_queue_pkt, sizeof(mes_add_queue_pkt), offsetof(union MESAPI__ADD_QUEUE, api_status)); } @@ -363,6 +385,7 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, struct mes_unmap_legacy_queue_input *input) { union MESAPI__REMOVE_QUEUE mes_remove_queue_pkt; + int pipe; memset(&mes_remove_queue_pkt, 0, sizeof(mes_remove_queue_pkt)); @@ -387,7 +410,12 @@ static int mes_v12_0_unmap_legacy_queue(struct amdgpu_mes *mes, convert_to_mes_queue_type(input->queue_type); } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt), offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } @@ -404,7 +432,7 @@ static int mes_v12_0_resume_gang(struct amdgpu_mes *mes, return 0; } -static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) +static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes, int pipe) { union MESAPI__QUERY_MES_STATUS mes_status_pkt; @@ -414,7 +442,7 @@ static int mes_v12_0_query_sched_status(struct amdgpu_mes *mes) mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS; mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_status_pkt, sizeof(mes_status_pkt), offsetof(union MESAPI__QUERY_MES_STATUS, api_status)); } @@ -423,6 +451,7 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, struct mes_misc_op_input *input) { union MESAPI__MISC misc_pkt; + int pipe; memset(&misc_pkt, 0, sizeof(misc_pkt)); @@ -475,12 +504,17 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, return -EINVAL; } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + if (mes->adev->enable_uni_mes) + pipe = AMDGPU_MES_KIQ_PIPE; + else + pipe = AMDGPU_MES_SCHED_PIPE; + + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &misc_pkt, sizeof(misc_pkt), offsetof(union MESAPI__MISC, api_status)); } -static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) { union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_1_pkt; @@ -491,12 +525,12 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 100; - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); } -static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) +static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) { int i; struct amdgpu_device *adev = mes->adev; @@ -508,27 +542,33 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC; mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; - mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; - mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; - mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; - mes_set_hw_res_pkt.paging_vmid = 0; - mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = mes->sch_ctx_gpu_addr; - mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = - mes->query_status_fence_gpu_addr; - - for (i = 0; i < MAX_COMPUTE_PIPES; i++) - mes_set_hw_res_pkt.compute_hqd_mask[i] = - mes->compute_hqd_mask[i]; - - for (i = 0; i < MAX_GFX_PIPES; i++) - mes_set_hw_res_pkt.gfx_hqd_mask[i] = mes->gfx_hqd_mask[i]; - - for (i = 0; i < MAX_SDMA_PIPES; i++) - mes_set_hw_res_pkt.sdma_hqd_mask[i] = mes->sdma_hqd_mask[i]; + if (pipe == AMDGPU_MES_SCHED_PIPE) { + mes_set_hw_res_pkt.vmid_mask_mmhub = mes->vmid_mask_mmhub; + mes_set_hw_res_pkt.vmid_mask_gfxhub = mes->vmid_mask_gfxhub; + mes_set_hw_res_pkt.gds_size = adev->gds.gds_size; + mes_set_hw_res_pkt.paging_vmid = 0; + + for (i = 0; i < MAX_COMPUTE_PIPES; i++) + mes_set_hw_res_pkt.compute_hqd_mask[i] = + mes->compute_hqd_mask[i]; + + for (i = 0; i < MAX_GFX_PIPES; i++) + mes_set_hw_res_pkt.gfx_hqd_mask[i] = + mes->gfx_hqd_mask[i]; + + for (i = 0; i < MAX_SDMA_PIPES; i++) + mes_set_hw_res_pkt.sdma_hqd_mask[i] = + mes->sdma_hqd_mask[i]; + + for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) + mes_set_hw_res_pkt.aggregated_doorbells[i] = + mes->aggregated_doorbells[i]; + } - for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++) - mes_set_hw_res_pkt.aggregated_doorbells[i] = - mes->aggregated_doorbells[i]; + mes_set_hw_res_pkt.g_sch_ctx_gpu_mc_ptr = + mes->sch_ctx_gpu_addr[pipe]; + mes_set_hw_res_pkt.query_status_fence_gpu_mc_ptr = + mes->query_status_fence_gpu_addr[pipe]; for (i = 0; i < 5; i++) { mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i]; @@ -551,10 +591,12 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes) mes_set_hw_res_pkt.oversubscription_timer = 50; mes_set_hw_res_pkt.unmapped_doorbell_handling = 1; - mes_set_hw_res_pkt.enable_mes_event_int_logging = 0; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr; + } - return mes_v12_0_submit_pkt_and_poll_completion(mes, + return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); } @@ -732,16 +774,11 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) if (enable) { data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - soc21_grbm_select(adev, 3, pipe, 0, 0); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; @@ -755,8 +792,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) /* unhalt MES and activate pipe0 */ data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); if (amdgpu_emu_mode) @@ -772,8 +808,7 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) data = REG_SET_FIELD(data, CP_MES_CNTL, MES_INVALIDATE_ICACHE, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, - (!adev->enable_uni_mes && adev->enable_mes_kiq) ? 1 : 0); + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); data = REG_SET_FIELD(data, CP_MES_CNTL, MES_HALT, 1); WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); } @@ -788,10 +823,6 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if ((!adev->enable_mes_kiq || adev->enable_uni_mes) && - pipe == AMDGPU_MES_KIQ_PIPE) - continue; - /* me=3, queue=0 */ soc21_grbm_select(adev, 3, pipe, 0, 0); @@ -1083,7 +1114,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) return r; } - kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring); + kiq->pmf->kiq_map_queues(kiq_ring, &adev->mes.ring[0]); r = amdgpu_ring_test_ring(kiq_ring); if (r) { @@ -1099,14 +1130,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, struct amdgpu_ring *ring; int r; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; - if ((pipe == AMDGPU_MES_SCHED_PIPE) && + if ((adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) && (amdgpu_in_reset(adev) || adev->in_suspend)) { *(ring->wptr_cpu_addr) = 0; *(ring->rptr_cpu_addr) = 0; @@ -1118,13 +1147,12 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return r; if (pipe == AMDGPU_MES_SCHED_PIPE) { - if (adev->enable_uni_mes) { - mes_v12_0_queue_init_register(ring); - } else { + if (adev->enable_uni_mes) + r = amdgpu_mes_map_legacy_queue(adev, ring); + else r = mes_v12_0_kiq_enable_queue(adev); - if (r) - return r; - } + if (r) + return r; } else { mes_v12_0_queue_init_register(ring); } @@ -1144,25 +1172,29 @@ static int mes_v12_0_queue_init(struct amdgpu_device *adev, return 0; } -static int mes_v12_0_ring_init(struct amdgpu_device *adev) +static int mes_v12_0_ring_init(struct amdgpu_device *adev, int pipe) { struct amdgpu_ring *ring; - ring = &adev->mes.ring; + ring = &adev->mes.ring[pipe]; ring->funcs = &mes_v12_0_ring_funcs; ring->me = 3; - ring->pipe = 0; + ring->pipe = pipe; ring->queue = 0; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; - ring->eop_gpu_addr = adev->mes.eop_gpu_addr[AMDGPU_MES_SCHED_PIPE]; + ring->eop_gpu_addr = adev->mes.eop_gpu_addr[pipe]; ring->no_scheduler = true; sprintf(ring->name, "mes_%d.%d.%d", ring->me, ring->pipe, ring->queue); + if (pipe == AMDGPU_MES_SCHED_PIPE) + ring->doorbell_index = adev->doorbell_index.mes_ring0 << 1; + else + ring->doorbell_index = adev->doorbell_index.mes_ring1 << 1; + return amdgpu_ring_init(adev, ring, 1024, NULL, 0, AMDGPU_RING_PRIO_DEFAULT, NULL); } @@ -1176,7 +1208,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) ring = &adev->gfx.kiq[0].ring; ring->me = 3; - ring->pipe = adev->enable_uni_mes ? 0 : 1; + ring->pipe = 1; ring->queue = 0; ring->adev = NULL; @@ -1198,12 +1230,10 @@ static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; - if (pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) ring = &adev->gfx.kiq[0].ring; - else if (pipe == AMDGPU_MES_SCHED_PIPE) - ring = &adev->mes.ring; else - BUG(); + ring = &adev->mes.ring[pipe]; if (ring->mqd_obj) return 0; @@ -1237,14 +1267,13 @@ static int mes_v12_0_sw_init(void *handle) adev->mes.kiq_hw_init = &mes_v12_0_kiq_hw_init; adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; + adev->mes.event_log_size = AMDGPU_MES_LOG_BUFFER_SIZE; + r = amdgpu_mes_init(adev); if (r) return r; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; - r = mes_v12_0_allocate_eop_buf(adev, pipe); if (r) return r; @@ -1252,18 +1281,15 @@ static int mes_v12_0_sw_init(void *handle) r = mes_v12_0_mqd_sw_init(adev, pipe); if (r) return r; - } - if (adev->enable_mes_kiq) { - r = mes_v12_0_kiq_ring_init(adev); + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + r = mes_v12_0_kiq_ring_init(adev); + else + r = mes_v12_0_ring_init(adev, pipe); if (r) return r; } - r = mes_v12_0_ring_init(adev); - if (r) - return r; - return 0; } @@ -1272,9 +1298,6 @@ static int mes_v12_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe; - amdgpu_device_wb_free(adev, adev->mes.sch_ctx_offs); - amdgpu_device_wb_free(adev, adev->mes.query_status_fence_offs); - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1282,18 +1305,21 @@ static int mes_v12_0_sw_fini(void *handle) &adev->mes.eop_gpu_addr[pipe], NULL); amdgpu_ucode_release(&adev->mes.fw[pipe]); - } - amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, - &adev->gfx.kiq[0].ring.mqd_gpu_addr, - &adev->gfx.kiq[0].ring.mqd_ptr); - - amdgpu_bo_free_kernel(&adev->mes.ring.mqd_obj, - &adev->mes.ring.mqd_gpu_addr, - &adev->mes.ring.mqd_ptr); + if (adev->enable_uni_mes || pipe == AMDGPU_MES_SCHED_PIPE) { + amdgpu_bo_free_kernel(&adev->mes.ring[pipe].mqd_obj, + &adev->mes.ring[pipe].mqd_gpu_addr, + &adev->mes.ring[pipe].mqd_ptr); + amdgpu_ring_fini(&adev->mes.ring[pipe]); + } + } - amdgpu_ring_fini(&adev->gfx.kiq[0].ring); - amdgpu_ring_fini(&adev->mes.ring); + if (!adev->enable_uni_mes) { + amdgpu_bo_free_kernel(&adev->gfx.kiq[0].ring.mqd_obj, + &adev->gfx.kiq[0].ring.mqd_gpu_addr, + &adev->gfx.kiq[0].ring.mqd_ptr); + amdgpu_ring_fini(&adev->gfx.kiq[0].ring); + } if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { mes_v12_0_free_ucode_buffers(adev, AMDGPU_MES_KIQ_PIPE); @@ -1337,7 +1363,7 @@ static void mes_v12_0_kiq_dequeue_sched(struct amdgpu_device *adev) soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - adev->mes.ring.sched.ready = false; + adev->mes.ring[0].sched.ready = false; } static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) @@ -1358,10 +1384,10 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) { int r = 0; - mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); - if (adev->enable_uni_mes) - return mes_v12_0_hw_init(adev); + mes_v12_0_kiq_setting(&adev->mes.ring[AMDGPU_MES_KIQ_PIPE]); + else + mes_v12_0_kiq_setting(&adev->gfx.kiq[0].ring); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { @@ -1388,6 +1414,14 @@ static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) if (r) goto failure; + if (adev->enable_uni_mes) { + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_KIQ_PIPE); + if (r) + goto failure; + + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_KIQ_PIPE); + } + r = mes_v12_0_hw_init(adev); if (r) goto failure; @@ -1401,9 +1435,15 @@ failure: static int mes_v12_0_kiq_hw_fini(struct amdgpu_device *adev) { - if (adev->mes.ring.sched.ready) { - mes_v12_0_kiq_dequeue_sched(adev); - adev->mes.ring.sched.ready = false; + if (adev->mes.ring[0].sched.ready) { + if (adev->enable_uni_mes) + amdgpu_mes_unmap_legacy_queue(adev, + &adev->mes.ring[AMDGPU_MES_SCHED_PIPE], + RESET_QUEUES, 0, 0); + else + mes_v12_0_kiq_dequeue_sched(adev); + + adev->mes.ring[0].sched.ready = false; } mes_v12_0_enable(adev, false); @@ -1416,10 +1456,10 @@ static int mes_v12_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (adev->mes.ring.sched.ready) + if (adev->mes.ring[0].sched.ready) goto out; - if (!adev->enable_mes_kiq || adev->enable_uni_mes) { + if (!adev->enable_mes_kiq) { if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { r = mes_v12_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, true); @@ -1439,23 +1479,23 @@ static int mes_v12_0_hw_init(void *handle) mes_v12_0_enable(adev, true); } + /* Enable the MES to handle doorbell ring on unmapped queue */ + mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); + r = mes_v12_0_queue_init(adev, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; - r = mes_v12_0_set_hw_resources(&adev->mes); + r = mes_v12_0_set_hw_resources(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) goto failure; if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); - /* Enable the MES to handle doorbell ring on unmapped queue */ - mes_v12_0_enable_unmapped_doorbell_handling(&adev->mes, true); - - r = mes_v12_0_query_sched_status(&adev->mes); + r = mes_v12_0_query_sched_status(&adev->mes, AMDGPU_MES_SCHED_PIPE); if (r) { DRM_ERROR("MES is busy\n"); goto failure; @@ -1468,7 +1508,7 @@ out: * with MES enabled. */ adev->gfx.kiq[0].ring.sched.ready = false; - adev->mes.ring.sched.ready = true; + adev->mes.ring[0].sched.ready = true; return 0; @@ -1511,17 +1551,7 @@ static int mes_v12_0_early_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int pipe, r; - if (adev->enable_uni_mes) { - r = amdgpu_mes_init_microcode(adev, AMDGPU_MES_SCHED_PIPE); - if (!r) - return 0; - - adev->enable_uni_mes = false; - } - for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { - if (!adev->enable_mes_kiq && pipe == AMDGPU_MES_KIQ_PIPE) - continue; r = amdgpu_mes_init_microcode(adev, pipe); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c index 5bbaa2b2caaba4d69c45dc4f3bf8da42c59e075f..0fbc3be81f140fcc81859767fc3ffe24646e144a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v4_1_0.c @@ -80,7 +80,8 @@ static uint32_t mmhub_v4_1_0_get_invalidate_req(unsigned int vmid, /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + /* Only use legacy inv on mmhub side */ + req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, MMVM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index af1e90159ce36c386f8cf4a77fcdc93c0c41c59b..2e72d445415f9eccf18c4fd2ef69d861fd35639d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -176,14 +176,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring) DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n", ring->doorbell_index, ring->wptr << 2); WDOORBELL64(ring->doorbell_index, ring->wptr << 2); - /* SDMA seems to miss doorbells sometimes when powergating kicks in. - * Updating the wptr directly will wake it. This is only safe because - * we disallow gfxoff in begin_use() and then allow it again in end_use(). - */ - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), - lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), - upper_32_bits(ring->wptr << 2)); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(5, 2, 1)) { + /* SDMA seems to miss doorbells sometimes when powergating kicks in. + * Updating the wptr directly will wake it. This is only safe because + * we disallow gfxoff in begin_use() and then allow it again in end_use(). + */ + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), + lower_32_bits(ring->wptr << 2)); + WREG32(sdma_v5_2_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), + upper_32_bits(ring->wptr << 2)); + } } else { DRM_DEBUG("Not using doorbell -- " "mmSDMA%i_GFX_RB_WPTR == 0x%08x " diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 41b5e45697dcde43add1f99064e013e0ee153907..ecee9e7d7e4c6bd30beae52723bd1f881d287309 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1575,8 +1575,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | - SDMA_PKT_COPY_LINEAR_HEADER_CPV((copy_flags & - (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED)) ? 1 : 0); + SDMA_PKT_COPY_LINEAR_HEADER_CPV(1); ib->ptr[ib->length_dw++] = byte_count - 1; ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ @@ -1590,6 +1589,8 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); + else + ib->ptr[ib->length_dw++] = 0; } /** @@ -1616,7 +1617,7 @@ static void sdma_v7_0_emit_fill_buffer(struct amdgpu_ib *ib, static const struct amdgpu_buffer_funcs sdma_v7_0_buffer_funcs = { .copy_max_bytes = 0x400000, - .copy_num_dw = 7, + .copy_num_dw = 8, .emit_copy_buffer = sdma_v7_0_emit_copy_buffer, .fill_max_bytes = 0x400000, .fill_num_dw = 5, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 2357ff39323f05a78078ada111deedc00cd8cf17..e74e1983da53aa41a5950d871e76fde495d60463 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -76,6 +76,12 @@ ((cond & 0xF) << 24) | \ ((type & 0xF) << 28)) +#define CP_PACKETJ_NOP 0x60000000 +#define CP_PACKETJ_GET_REG(x) ((x) & 0x3FFFF) +#define CP_PACKETJ_GET_RES(x) (((x) >> 18) & 0x3F) +#define CP_PACKETJ_GET_COND(x) (((x) >> 24) & 0xF) +#define CP_PACKETJ_GET_TYPE(x) (((x) >> 28) & 0xF) + /* Packet 3 types */ #define PACKET3_NOP 0x10 #define PACKET3_SET_BASE 0x11 diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index 7d641d0dadba4d0a3bd884414e3afa244ba6c1e2..b0c3678cfb31d80cf6f590037a1043a758e644d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -406,6 +406,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | AMD_PG_SUPPORT_JPEG | @@ -424,6 +425,7 @@ static int soc24_common_early_init(void *handle) AMD_CG_SUPPORT_ATHUB_MGCG | AMD_CG_SUPPORT_ATHUB_LS | AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_HDP_SD | AMD_CG_SUPPORT_MC_LS; adev->pg_flags = AMD_PG_SUPPORT_VCN | diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 7e7929f24ae44705c3b6e14b14e09ecfd48c268d..983a977632ff4e263b588dc5d140315f3e8e65fe 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2893,6 +2893,9 @@ static int dm_suspend(void *handle) hpd_rx_irq_work_suspend(dm); + if (adev->dm.dc->caps.ips_support) + dc_allow_idle_optimizations(adev->dm.dc, true); + dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D3); dc_dmub_srv_set_power_state(dm->dc->ctx->dmub_srv, DC_ACPI_CM_POWER_STATE_D3); diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 5442da90f508f673e162b5f7ca41c7ec9f04b472..2e9f6da1acdcad0e3236105afe0e5d78f64084c2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -804,12 +804,25 @@ struct dsc_mst_fairness_params { }; #if defined(CONFIG_DRM_AMD_DC_FP) -static int kbps_to_peak_pbn(int kbps) +static uint16_t get_fec_overhead_multiplier(struct dc_link *dc_link) +{ + u8 link_coding_cap; + uint16_t fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B; + + link_coding_cap = dc_link_dp_mst_decide_link_encoding_format(dc_link); + if (link_coding_cap == DP_128b_132b_ENCODING) + fec_overhead_multiplier_x1000 = PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B; + + return fec_overhead_multiplier_x1000; +} + +static int kbps_to_peak_pbn(int kbps, uint16_t fec_overhead_multiplier_x1000) { u64 peak_kbps = kbps; peak_kbps *= 1006; - peak_kbps = div_u64(peak_kbps, 1000); + peak_kbps *= fec_overhead_multiplier_x1000; + peak_kbps = div_u64(peak_kbps, 1000 * 1000); return (int) DIV64_U64_ROUND_UP(peak_kbps * 64, (54 * 8 * 1000)); } @@ -910,11 +923,12 @@ static int increase_dsc_bpp(struct drm_atomic_state *state, int link_timeslots_used; int fair_pbn_alloc; int ret = 0; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled) { initial_slack[i] = - kbps_to_peak_pbn(params[i].bw_range.max_kbps) - vars[i + k].pbn; + kbps_to_peak_pbn(params[i].bw_range.max_kbps, fec_overhead_multiplier_x1000) - vars[i + k].pbn; bpp_increased[i] = false; remaining_to_increase += 1; } else { @@ -1010,6 +1024,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, int next_index; int remaining_to_try = 0; int ret; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); for (i = 0; i < count; i++) { if (vars[i + k].dsc_enabled @@ -1039,7 +1054,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, if (next_index == -1) break; - vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1052,8 +1067,7 @@ static int try_disable_dsc(struct drm_atomic_state *state, vars[next_index].dsc_enabled = false; vars[next_index].bpp_x16 = 0; } else { - vars[next_index].pbn = kbps_to_peak_pbn( - params[next_index].bw_range.max_kbps); + vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps, fec_overhead_multiplier_x1000); ret = drm_dp_atomic_find_time_slots(state, params[next_index].port->mgr, params[next_index].port, @@ -1082,6 +1096,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, int count = 0; int i, k, ret; bool debugfs_overwrite = false; + uint16_t fec_overhead_multiplier_x1000 = get_fec_overhead_multiplier(dc_link); memset(params, 0, sizeof(params)); @@ -1146,7 +1161,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try no compression */ for (i = 0; i < count; i++) { vars[i + k].aconnector = params[i].aconnector; - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port, @@ -1165,7 +1180,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, /* Try max compression */ for (i = 0; i < count; i++) { if (params[i].compression_possible && params[i].clock_force_enable != DSC_CLK_FORCE_DISABLE) { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = true; vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1173,7 +1188,7 @@ static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state, if (ret < 0) return ret; } else { - vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps); + vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps, fec_overhead_multiplier_x1000); vars[i + k].dsc_enabled = false; vars[i + k].bpp_x16 = 0; ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, @@ -1270,6 +1285,9 @@ static bool is_dsc_need_re_compute( } } + if (new_stream_on_link_num == 0) + return false; + /* check current_state if there stream on link but it is not in * new request state */ diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h index fa84d34b737367a57eeb608f86a0e1d1ecad0b09..600d6e22101112b83beb50b554246877c8e99d66 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h @@ -46,6 +46,9 @@ #define SYNAPTICS_CASCADED_HUB_ID 0x5A #define IS_SYNAPTICS_CASCADED_PANAMERA(devName, data) ((IS_SYNAPTICS_PANAMERA(devName) && ((int)data[2] == SYNAPTICS_CASCADED_HUB_ID)) ? 1 : 0) +#define PBN_FEC_OVERHEAD_MULTIPLIER_8B_10B 1031 +#define PBN_FEC_OVERHEAD_MULTIPLIER_128B_132B 1000 + enum mst_msg_ready_type { NONE_MSG_RDY_EVENT = 0, DOWN_REP_MSG_RDY_EVENT = 1, diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c index be2638c763d7897031d651433d8c66890ffe2454..9a406d74c0dd7617948782ab12ef8de3e003ab7a 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c @@ -35,8 +35,10 @@ #include "dc_stream_priv.h" #define DC_LOGGER dc->ctx->logger +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(x, y) ((x > y) ? x : y) +#endif /******************************************************************************* * Private functions diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c index 2a21bcf5224f452b6bc8584a614844524df06f48..4d960dc5ce8935f1d12c9a8fc4f3e4045adadf42 100644 --- a/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c +++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_replay.c @@ -185,8 +185,7 @@ static bool dmub_replay_copy_settings(struct dmub_replay *dmub, else copy_settings_data->flags.bitfields.force_wakeup_by_tps3 = 0; - - dm_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); + dc_wake_and_execute_dmub_cmd(dc, &cmd, DM_DMUB_WAIT_TYPE_WAIT); return true; } diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index 3c0222aa4df1d86bc73f630679e55344e1add943..46f9c05de16e8c9035f9e26c0b5c481c274d52ef 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -83,6 +83,8 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/display_rq_dlg_calc_31.o := $(dml_rcfla CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_rq_dlg_calc_32.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags) +CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags) CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o := $(dml_rcflags) diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c index e06fc370267beaef2cabfa6972c12677be00617f..14a902ff3b8ac9fac75f1dae4b2cb47e053635cd 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn10/dcn10_hwseq.c @@ -1402,6 +1402,8 @@ void dcn10_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } @@ -3587,7 +3589,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = temp_x + viewport_width; + pos_cpy.x = 2 * viewport_width - temp_x; } } } else { @@ -3680,7 +3682,7 @@ void dcn10_set_cursor_position(struct pipe_ctx *pipe_ctx) (int)hubp->curs_attr.width || pos_cpy.x <= (int)hubp->curs_attr.width + pipe_ctx->plane_state->src_rect.x) { - pos_cpy.x = 2 * viewport_width - temp_x; + pos_cpy.x = temp_x + viewport_width; } } } else { diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c index e4f7078c10269ff9c6ed452dc1f3fdcd969ca775..f115c7a285e7766c36cf2efb56d242fa0b0d8013 100644 --- a/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn35/dcn35_hwseq.c @@ -771,6 +771,8 @@ void dcn35_init_pipes(struct dc *dc, struct dc_state *context) if (hubbub && hubp) { if (hubbub->funcs->program_det_size) hubbub->funcs->program_det_size(hubbub, hubp->inst, 0); + if (hubbub->funcs->program_det_segments) + hubbub->funcs->program_det_segments(hubbub, hubp->inst, 0); } } diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c index 9a3cc0514a36ed19b856ff4744ce4cade2b3ecbf..8e0588b1cf3054a60e6164a51cab3f1599e14bc7 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c @@ -1778,6 +1778,9 @@ static bool dcn321_resource_construct( dc->caps.color.mpc.ogam_rom_caps.hlg = 0; dc->caps.color.mpc.ocsc = 1; + /* Use pipe context based otg sync logic */ + dc->config.use_pipe_ctx_sync_logic = true; + dc->config.dc_mode_clk_limit_support = true; dc->config.enable_windowed_mpo_odm = true; /* read VBIOS LTTPR caps */ diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c index a05a2209a44e67dc3fea45a6d8e7e785cb379d0f..34b02147881ddb721751ef37eaadbd1117803d96 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c @@ -723,6 +723,7 @@ static const struct dc_debug_options debug_defaults_drv = { .min_prefetch_in_strobe_ns = 60000, // 60us .disable_unbounded_requesting = false, .enable_legacy_fast_update = false, + .dcc_meta_propagation_delay_us = 10, .fams2_config = { .bits = { .enable = true, diff --git a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h index 26efeada4f41fd7649fa7ad0311ec3ef9c5b6132..bb46f30d11d0a1ef5f38d8fba3c9d30cb46dfbc8 100644 --- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h +++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.h @@ -138,7 +138,9 @@ void dcn401_prepare_mcache_programming(struct dc *dc, struct dc_state *context); SRI_ARR(DCHUBP_MALL_CONFIG, HUBP, id), \ SRI_ARR(DCHUBP_VMPG_CONFIG, HUBP, id), \ SRI_ARR(UCLK_PSTATE_FORCE, HUBPREQ, id), \ - HUBP_3DLUT_FL_REG_LIST_DCN401(id) + HUBP_3DLUT_FL_REG_LIST_DCN401(id), \ + SRI_ARR(DCSURF_VIEWPORT_MCACHE_SPLIT_COORDINATE, HUBP, id), \ + SRI_ARR(DCHUBP_MCACHEID_CONFIG, HUBP, id) /* ABM */ #define ABM_DCN401_REG_LIST_RI(id) \ diff --git a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c index 7ecf76aea950514e12b4afd8599b147245d31285..6e064e6ae949f3979e06c33ff8597c27a2550422 100644 --- a/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c +++ b/drivers/gpu/drm/amd/display/modules/hdcp/hdcp_ddc.c @@ -25,7 +25,9 @@ #include "hdcp.h" +#ifndef MIN #define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif #define HDCP_I2C_ADDR 0x3a /* 0x74 >> 1*/ #define KSV_READ_SIZE 0xf /* 0x6803b - 0x6802c */ #define HDCP_MAX_AUX_TRANSACTION_SIZE 16 diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h index b72d5d3622512338797ed3e81dd3de7c196dcfed..21ceafce1f9b27c96486e14787505c64a4484f13 100644 --- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 1 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0x4000 + /* Driver submits one API(cmd) as a single Frame and this command size is same * for all API to ease the debugging and parsing of ring buffer. */ diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h index ffd67c6ed9b3702de090d0b9160ee8b70b665484..101e2fe962c6a6bc1b34023ffe6bb9d5d2308e1c 100644 --- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h +++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h @@ -28,6 +28,9 @@ #define MES_API_VERSION 0x14 +/* Maximum log buffer size for MES. Needs to be updated if MES expands MES_EVT_INTR_HIST_LOG_12 */ +#define AMDGPU_MES_LOG_BUFFER_SIZE 0xC000 + /* Driver submits one API(cmd) as a single Frame and this command size is same for all API * to ease the debugging and parsing of ring buffer. */ @@ -94,6 +97,7 @@ enum MES_QUEUE_TYPE { MES_QUEUE_TYPE_SDMA, MES_QUEUE_TYPE_MAX, + MES_QUEUE_TYPE_SCHQ = MES_QUEUE_TYPE_MAX, }; struct MES_API_STATUS { @@ -239,8 +243,12 @@ union MESAPI_SET_HW_RESOURCES { uint32_t send_write_data : 1; uint32_t os_tdr_timeout_override : 1; uint32_t use_rs64mem_for_proc_gang_ctx : 1; + uint32_t halt_on_misaligned_access : 1; + uint32_t use_add_queue_unmap_flag_addr : 1; + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; uint32_t unmapped_doorbell_handling: 2; - uint32_t reserved : 15; + uint32_t reserved : 11; }; uint32_t uint32_all; }; diff --git a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c index a1b8a82d77cf9a8f138f2ec98ff9efd9ab26332b..8b7d6ed7e2ed235af9dcff50f14bb2f3996803dc 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_dpm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_dpm.c @@ -618,7 +618,8 @@ int amdgpu_pm_load_smu_firmware(struct amdgpu_device *adev, uint32_t *smu_versio const struct amd_pm_funcs *pp_funcs = adev->powerplay.pp_funcs; int r = 0; - if (!pp_funcs || !pp_funcs->load_firmware || adev->flags & AMD_IS_APU) + if (!pp_funcs || !pp_funcs->load_firmware || + (is_support_sw_smu(adev) && (adev->flags & AMD_IS_APU))) return 0; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h index 6f54c410c2f9859392e78e15b1334d13b8713218..409aeec6baa92193db436878b1d0552b00c126b6 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/ppevvmath.h @@ -22,12 +22,18 @@ */ #include -#define SHIFT_AMOUNT 16 /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ +enum ppevvmath_constants { + /* We multiply all original integers with 2^SHIFT_AMOUNT to get the fInt representation */ + SHIFT_AMOUNT = 16, -#define PRECISION 5 /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + /* Change this value to change the number of decimal places in the final output - 5 is a good default */ + PRECISION = 5, -#define SHIFTED_2 (2 << SHIFT_AMOUNT) -#define MAX (1 << (SHIFT_AMOUNT - 1)) - 1 /* 32767 - Might change in the future */ + SHIFTED_2 = (2 << SHIFT_AMOUNT), + + /* 32767 - Might change in the future */ + MAX = (1 << (SHIFT_AMOUNT - 1)) - 1, +}; /* ------------------------------------------------------------------------------- * NEW TYPE - fINT diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h index 4a3fde89aed73185a17e427d4d859c63158946ef..75c921e87360eb397a0dc874660b4207873c90df 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h +++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v14_0_2_pptable.h @@ -27,7 +27,8 @@ #pragma pack(push, 1) -#define SMU_14_0_2_TABLE_FORMAT_REVISION 3 +#define SMU_14_0_2_TABLE_FORMAT_REVISION 23 +#define SMU_14_0_2_CUSTOM_TABLE_FORMAT_REVISION 1 // POWERPLAYTABLE::ulPlatformCaps #define SMU_14_0_2_PP_PLATFORM_CAP_POWERPLAY 0x1 // This cap indicates whether CCC need to show Powerplay page. @@ -43,6 +44,7 @@ #define SMU_14_0_2_PP_THERMALCONTROLLER_NONE 0 #define SMU_14_0_2_PP_OVERDRIVE_VERSION 0x1 // TODO: FIX OverDrive Version TBD +#define SMU_14_0_2_PP_CUSTOM_OVERDRIVE_VERSION 0x1 #define SMU_14_0_2_PP_POWERSAVINGCLOCK_VERSION 0x01 // Power Saving Clock Table Version 1.00 enum SMU_14_0_2_OD_SW_FEATURE_CAP @@ -107,6 +109,7 @@ enum SMU_14_0_2_PWRMODE_SETTING SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_BALANCE, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_TURBO, SMU_14_0_2_PMSETTING_ACOUSTIC_LIMIT_RPM_RAGE, + SMU_14_0_2_PMSETTING_COUNT }; #define SMU_14_0_2_MAX_PMSETTING 32 // Maximum Number of PowerMode Settings @@ -127,17 +130,24 @@ struct smu_14_0_2_overdrive_table int16_t pm_setting[SMU_14_0_2_MAX_PMSETTING]; // Optimized power mode feature settings }; +enum smu_14_0_3_pptable_source { + PPTABLE_SOURCE_IFWI = 0, + PPTABLE_SOURCE_DRIVER_HARDCODED = 1, + PPTABLE_SOURCE_PPGEN_REGISTRY = 2, + PPTABLE_SOURCE_MAX = PPTABLE_SOURCE_PPGEN_REGISTRY, +}; + struct smu_14_0_2_powerplay_table { struct atom_common_table_header header; // header.format_revision = 3 (HAS TO MATCH SMU_14_0_2_TABLE_FORMAT_REVISION), header.content_revision = ? structuresize is calculated by PPGen. uint8_t table_revision; // PPGen use only: table_revision = 3 - uint8_t padding; // Padding 1 byte to align table_size offset to 6 bytes (pmfw_start_offset, for PMFW to know the starting offset of PPTable_t). + uint8_t pptable_source; // PPGen UI dropdown box uint16_t pmfw_pptable_start_offset; // The start offset of the pmfw portion. i.e. start of PPTable_t (start of SkuTable_t) uint16_t pmfw_pptable_size; // The total size of pmfw_pptable, i.e PPTable_t. - uint16_t pmfw_pfe_table_start_offset; // The start offset of the PFE_Settings_t within pmfw_pptable. - uint16_t pmfw_pfe_table_size; // The size of PFE_Settings_t. - uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t within pmfw_pptable. - uint16_t pmfw_board_table_size; // The size of BoardTable_t. + uint16_t pmfw_sku_table_start_offset; // DO NOT CHANGE ORDER; The absolute start offset of the SkuTable_t (within smu_14_0_3_powerplay_table). + uint16_t pmfw_sku_table_size; // DO NOT CHANGE ORDER; The size of SkuTable_t. + uint16_t pmfw_board_table_start_offset; // The start offset of the BoardTable_t + uint16_t pmfw_board_table_size; // The size of BoardTable_t. uint16_t pmfw_custom_sku_table_start_offset; // The start offset of the CustomSkuTable_t within pmfw_pptable. uint16_t pmfw_custom_sku_table_size; // The size of the CustomSkuTable_t. uint32_t golden_pp_id; // PPGen use only: PP Table ID on the Golden Data Base @@ -159,6 +169,36 @@ struct smu_14_0_2_powerplay_table PPTable_t smc_pptable; // PPTable_t in driver_if.h -- as requested by PMFW, this offset should start at a 32-byte boundary, and the table_size above should remain at offset=6 bytes }; +enum SMU_14_0_2_CUSTOM_OD_SW_FEATURE_CAP { + SMU_14_0_2_CUSTOM_ODCAP_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODCAP_COUNT +}; + +enum SMU_14_0_2_CUSTOM_OD_FEATURE_SETTING_ID { + SMU_14_0_2_CUSTOM_ODSETTING_POWER_MODE = 0, + SMU_14_0_2_CUSTOM_ODSETTING_COUNT, +}; + +struct smu_14_0_2_custom_overdrive_table { + uint8_t revision; + uint8_t reserve[3]; + uint8_t cap[SMU_14_0_2_CUSTOM_ODCAP_COUNT]; + int32_t max[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int32_t min[SMU_14_0_2_CUSTOM_ODSETTING_COUNT]; + int16_t pm_setting[SMU_14_0_2_PMSETTING_COUNT]; +}; + +struct smu_14_0_3_custom_powerplay_table { + uint8_t custom_table_revision; + uint16_t custom_table_size; + uint16_t custom_sku_table_offset; + uint32_t custom_platform_caps; + uint16_t software_shutdown_temp; + struct smu_14_0_2_custom_overdrive_table custom_overdrive_table; + uint32_t reserve[8]; + CustomSkuTable_t custom_sku_table_pmfw; +}; + #pragma pack(pop) #endif diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c index 98ea58d792ca18c0efdbb4bad33367edc2454202..e1a27903c80a149a115cc3ae916cacb2593e3038 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c @@ -66,6 +66,7 @@ #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE 0x4000 #define DEBUGSMC_MSG_Mode1Reset 2 +#define LINK_SPEED_MAX 3 static struct cmn2asic_msg_mapping smu_v14_0_2_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(TestMessage, PPSMC_MSG_TestMessage, 1), @@ -221,7 +222,6 @@ static struct cmn2asic_mapping smu_v14_0_2_workload_map[PP_SMC_POWER_PROFILE_COU WORKLOAD_MAP(PP_SMC_POWER_PROFILE_WINDOW3D, WORKLOAD_PPLIB_WINDOW_3D_BIT), }; -#if 0 static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_PPT0_BIT] = (SMU_THROTTLER_PPT0_BIT), [THROTTLER_PPT1_BIT] = (SMU_THROTTLER_PPT1_BIT), @@ -241,7 +241,6 @@ static const uint8_t smu_v14_0_2_throttler_map[] = { [THROTTLER_GFX_APCC_PLUS_BIT] = (SMU_THROTTLER_APCC_BIT), [THROTTLER_FIT_BIT] = (SMU_THROTTLER_FIT_BIT), }; -#endif static int smu_v14_0_2_get_allowed_feature_mask(struct smu_context *smu, @@ -1869,6 +1868,88 @@ static ssize_t smu_v14_0_2_get_ecc_info(struct smu_context *smu, return ret; } +static ssize_t smu_v14_0_2_get_gpu_metrics(struct smu_context *smu, + void **table) +{ + struct smu_table_context *smu_table = &smu->smu_table; + struct gpu_metrics_v1_3 *gpu_metrics = + (struct gpu_metrics_v1_3 *)smu_table->gpu_metrics_table; + SmuMetricsExternal_t metrics_ext; + SmuMetrics_t *metrics = &metrics_ext.SmuMetrics; + int ret = 0; + + ret = smu_cmn_get_metrics_table(smu, + &metrics_ext, + true); + if (ret) + return ret; + + smu_cmn_init_soft_gpu_metrics(gpu_metrics, 1, 3); + + gpu_metrics->temperature_edge = metrics->AvgTemperature[TEMP_EDGE]; + gpu_metrics->temperature_hotspot = metrics->AvgTemperature[TEMP_HOTSPOT]; + gpu_metrics->temperature_mem = metrics->AvgTemperature[TEMP_MEM]; + gpu_metrics->temperature_vrgfx = metrics->AvgTemperature[TEMP_VR_GFX]; + gpu_metrics->temperature_vrsoc = metrics->AvgTemperature[TEMP_VR_SOC]; + gpu_metrics->temperature_vrmem = max(metrics->AvgTemperature[TEMP_VR_MEM0], + metrics->AvgTemperature[TEMP_VR_MEM1]); + + gpu_metrics->average_gfx_activity = metrics->AverageGfxActivity; + gpu_metrics->average_umc_activity = metrics->AverageUclkActivity; + gpu_metrics->average_mm_activity = max(metrics->Vcn0ActivityPercentage, + metrics->Vcn1ActivityPercentage); + + gpu_metrics->average_socket_power = metrics->AverageSocketPower; + gpu_metrics->energy_accumulator = metrics->EnergyAccumulator; + + if (metrics->AverageGfxActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPostDs; + else + gpu_metrics->average_gfxclk_frequency = metrics->AverageGfxclkFrequencyPreDs; + + if (metrics->AverageUclkActivity <= SMU_14_0_2_BUSY_THRESHOLD) + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPostDs; + else + gpu_metrics->average_uclk_frequency = metrics->AverageMemclkFrequencyPreDs; + + gpu_metrics->average_vclk0_frequency = metrics->AverageVclk0Frequency; + gpu_metrics->average_dclk0_frequency = metrics->AverageDclk0Frequency; + gpu_metrics->average_vclk1_frequency = metrics->AverageVclk1Frequency; + gpu_metrics->average_dclk1_frequency = metrics->AverageDclk1Frequency; + + gpu_metrics->current_gfxclk = gpu_metrics->average_gfxclk_frequency; + gpu_metrics->current_socclk = metrics->CurrClock[PPCLK_SOCCLK]; + gpu_metrics->current_uclk = metrics->CurrClock[PPCLK_UCLK]; + gpu_metrics->current_vclk0 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk0 = metrics->CurrClock[PPCLK_DCLK_0]; + gpu_metrics->current_vclk1 = metrics->CurrClock[PPCLK_VCLK_0]; + gpu_metrics->current_dclk1 = metrics->CurrClock[PPCLK_DCLK_0]; + + gpu_metrics->throttle_status = + smu_v14_0_2_get_throttler_status(metrics); + gpu_metrics->indep_throttle_status = + smu_cmn_get_indep_throttler_status(gpu_metrics->throttle_status, + smu_v14_0_2_throttler_map); + + gpu_metrics->current_fan_speed = metrics->AvgFanRpm; + + gpu_metrics->pcie_link_width = metrics->PcieWidth; + if ((metrics->PcieRate - 1) > LINK_SPEED_MAX) + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(1); + else + gpu_metrics->pcie_link_speed = pcie_gen_to_speed(metrics->PcieRate); + + gpu_metrics->system_clock_counter = ktime_get_boottime_ns(); + + gpu_metrics->voltage_gfx = metrics->AvgVoltage[SVI_PLANE_VDD_GFX]; + gpu_metrics->voltage_soc = metrics->AvgVoltage[SVI_PLANE_VDD_SOC]; + gpu_metrics->voltage_mem = metrics->AvgVoltage[SVI_PLANE_VDDIO_MEM]; + + *table = (void *)gpu_metrics; + + return sizeof(struct gpu_metrics_v1_3); +} + static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .get_allowed_feature_mask = smu_v14_0_2_get_allowed_feature_mask, .set_default_dpm_table = smu_v14_0_2_set_default_dpm_table, @@ -1905,6 +1986,7 @@ static const struct pptable_funcs smu_v14_0_2_ppt_funcs = { .enable_thermal_alert = smu_v14_0_enable_thermal_alert, .disable_thermal_alert = smu_v14_0_disable_thermal_alert, .notify_memory_pool_location = smu_v14_0_notify_memory_pool_location, + .get_gpu_metrics = smu_v14_0_2_get_gpu_metrics, .set_soft_freq_limited_range = smu_v14_0_set_soft_freq_limited_range, .init_pptable_microcode = smu_v14_0_init_pptable_microcode, .populate_umd_state_clk = smu_v14_0_2_populate_umd_state_clk, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c index 88eefef05faedce4af8b7b0e2d27a0ace00038ec..91ad434bcdaeb421734fb486f6a0fbf7dfa65c82 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c @@ -794,7 +794,7 @@ static const char *smu_get_feature_name(struct smu_context *smu, size_t smu_cmn_get_pp_feature_mask(struct smu_context *smu, char *buf) { - int8_t sort_feature[max(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; + int8_t sort_feature[MAX(SMU_FEATURE_COUNT, SMU_FEATURE_MAX)]; uint64_t feature_mask; int i, feature_index; uint32_t count = 0; diff --git a/drivers/gpu/drm/ast/ast_dp.c b/drivers/gpu/drm/ast/ast_dp.c index 1e9259416980ec49cce1b7fc080f562f002e29c5..e6c7f0d64e99592534d1736013647972b83e64b7 100644 --- a/drivers/gpu/drm/ast/ast_dp.c +++ b/drivers/gpu/drm/ast/ast_dp.c @@ -158,7 +158,14 @@ void ast_dp_launch(struct drm_device *dev) ASTDP_HOST_EDID_READ_DONE); } +bool ast_dp_power_is_on(struct ast_device *ast) +{ + u8 vgacre3; + + vgacre3 = ast_get_index_reg(ast, AST_IO_VGACRI, 0xe3); + return !(vgacre3 & AST_DP_PHY_SLEEP); +} void ast_dp_power_on_off(struct drm_device *dev, bool on) { diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c index aae019e79bda88197bc08bc998bce20ee116cac2..225817087b4d2f8459a03ef7f5416f870fc2b65f 100644 --- a/drivers/gpu/drm/ast/ast_drv.c +++ b/drivers/gpu/drm/ast/ast_drv.c @@ -391,6 +391,11 @@ static int ast_drm_freeze(struct drm_device *dev) static int ast_drm_thaw(struct drm_device *dev) { + struct ast_device *ast = to_ast_device(dev); + + ast_enable_vga(ast->ioregs); + ast_open_key(ast->ioregs); + ast_enable_mmio(dev->dev, ast->ioregs); ast_post_gpu(dev); return drm_mode_config_helper_resume(dev); diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h index ba3d86973995fd0444f1b9934e4cc26fd5e769fb..47bab5596c16ed24714bb000e4305d0564556745 100644 --- a/drivers/gpu/drm/ast/ast_drv.h +++ b/drivers/gpu/drm/ast/ast_drv.h @@ -472,6 +472,7 @@ void ast_init_3rdtx(struct drm_device *dev); bool ast_astdp_is_connected(struct ast_device *ast); int ast_astdp_read_edid(struct drm_device *dev, u8 *ediddata); void ast_dp_launch(struct drm_device *dev); +bool ast_dp_power_is_on(struct ast_device *ast); void ast_dp_power_on_off(struct drm_device *dev, bool no); void ast_dp_set_on_off(struct drm_device *dev, bool no); void ast_dp_set_mode(struct drm_crtc *crtc, struct ast_vbios_mode_info *vbios_mode); diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c index dc8f639e82fdf5dadfec93f4ad6faa53728d00be..049ee1477c337539caa1c54046be1bb62a8a455e 100644 --- a/drivers/gpu/drm/ast/ast_mode.c +++ b/drivers/gpu/drm/ast/ast_mode.c @@ -28,6 +28,7 @@ * Authors: Dave Airlie */ +#include #include #include @@ -1687,11 +1688,35 @@ static int ast_astdp_connector_helper_detect_ctx(struct drm_connector *connector struct drm_modeset_acquire_ctx *ctx, bool force) { + struct drm_device *dev = connector->dev; struct ast_device *ast = to_ast_device(connector->dev); + enum drm_connector_status status = connector_status_disconnected; + struct drm_connector_state *connector_state = connector->state; + bool is_active = false; + + mutex_lock(&ast->modeset_lock); + + if (connector_state && connector_state->crtc) { + struct drm_crtc_state *crtc_state = connector_state->crtc->state; + + if (crtc_state && crtc_state->active) + is_active = true; + } + + if (!is_active && !ast_dp_power_is_on(ast)) { + ast_dp_power_on_off(dev, true); + msleep(50); + } if (ast_astdp_is_connected(ast)) - return connector_status_connected; - return connector_status_disconnected; + status = connector_status_connected; + + if (!is_active && status == connector_status_disconnected) + ast_dp_power_on_off(dev, false); + + mutex_unlock(&ast->modeset_lock); + + return status; } static const struct drm_connector_helper_funcs ast_astdp_connector_helper_funcs = { diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c index 22bbb2d83e3073518505d91a6c5c6898150aa0e6..7936c20239551817bc8236ef6c1f65606084a39d 100644 --- a/drivers/gpu/drm/drm_atomic_uapi.c +++ b/drivers/gpu/drm/drm_atomic_uapi.c @@ -1070,21 +1070,17 @@ int drm_atomic_set_property(struct drm_atomic_state *state, break; } - if (async_flip && prop != config->prop_fb_id) { + if (async_flip && + (plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY || + (prop != config->prop_fb_id && + prop != config->prop_in_fence_fd && + prop != config->prop_fb_damage_clips))) { ret = drm_atomic_plane_get_property(plane, plane_state, prop, &old_val); ret = drm_atomic_check_prop_changes(ret, old_val, prop_value, prop); break; } - if (async_flip && plane_state->plane->type != DRM_PLANE_TYPE_PRIMARY) { - drm_dbg_atomic(prop->dev, - "[OBJECT:%d] Only primary planes can be changed during async flip\n", - obj->id); - ret = -EINVAL; - break; - } - ret = drm_atomic_plane_set_property(plane, plane_state, file_priv, prop, prop_value); diff --git a/drivers/gpu/drm/drm_bridge_connector.c b/drivers/gpu/drm/drm_bridge_connector.c index 0869b663f17e4d3179efe59b5fe200b033f8f7ac..a4fbf1eb7ac5a845588600d96ccc4ad645fa5a14 100644 --- a/drivers/gpu/drm/drm_bridge_connector.c +++ b/drivers/gpu/drm/drm_bridge_connector.c @@ -443,10 +443,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, panel_bridge = bridge; } - if (connector_type == DRM_MODE_CONNECTOR_Unknown) { - kfree(bridge_connector); + if (connector_type == DRM_MODE_CONNECTOR_Unknown) return ERR_PTR(-EINVAL); - } if (bridge_connector->bridge_hdmi) ret = drmm_connector_hdmi_init(drm, connector, @@ -461,10 +459,8 @@ struct drm_connector *drm_bridge_connector_init(struct drm_device *drm, ret = drmm_connector_init(drm, connector, &drm_bridge_connector_funcs, connector_type, ddc); - if (ret) { - kfree(bridge_connector); + if (ret) return ERR_PTR(ret); - } drm_connector_helper_add(connector, &drm_bridge_connector_helper_funcs); diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ecc19a232d489af91fc2464fff9975..103c185bb1c8a76ac0fc7a45ce2a5f34c683298a 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/drm_client.c b/drivers/gpu/drm/drm_client.c index 2803ac111bbd8ad5a069b7700832365175de76da..bfedcbf516dbe2b1efd037ed22efe4cfe139da48 100644 --- a/drivers/gpu/drm/drm_client.c +++ b/drivers/gpu/drm/drm_client.c @@ -355,7 +355,7 @@ int drm_client_buffer_vmap_local(struct drm_client_buffer *buffer, err_drm_gem_vmap_unlocked: drm_gem_unlock(gem); - return 0; + return ret; } EXPORT_SYMBOL(drm_client_buffer_vmap_local); diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c index 31af5cf37a099810f282e7c18ef3da940132105e..cee5eafbfb81a8bbc12cbdce0d6f29022f513e89 100644 --- a/drivers/gpu/drm/drm_client_modeset.c +++ b/drivers/gpu/drm/drm_client_modeset.c @@ -880,6 +880,11 @@ int drm_client_modeset_probe(struct drm_client_dev *client, unsigned int width, kfree(modeset->mode); modeset->mode = drm_mode_duplicate(dev, mode); + if (!modeset->mode) { + ret = -ENOMEM; + break; + } + drm_connector_get(connector); modeset->connectors[modeset->num_connectors++] = connector; modeset->x = offset->x; diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index 18565ec6845173ca7216ff5e37ea5063846274b3..56ac37ea2f27484348fe501eee767b6b68034866 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -624,6 +624,17 @@ static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y, u32 width, u32 height) { + /* + * This function may be invoked by panic() to flush the frame + * buffer, where all CPUs except the panic CPU are stopped. + * During the following schedule_work(), the panic CPU needs + * the worker_pool lock, which might be held by a stopped CPU, + * causing schedule_work() and panic() to block. Return early on + * oops_in_progress to prevent this blocking. + */ + if (oops_in_progress) + return; + drm_fb_helper_add_damage_clip(helper, x, y, width, height); schedule_work(&helper->damage_work); diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c index 3f84d7527793ef99db4d4e3d595c4564925f8eaa..0830cae9a4d0f5e6692258a8bec1dca033f323d7 100644 --- a/drivers/gpu/drm/drm_panel_orientation_quirks.c +++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c @@ -208,6 +208,18 @@ static const struct dmi_system_id orientation_data[] = { DMI_MATCH(DMI_BOARD_NAME, "KUN"), }, .driver_data = (void *)&lcd1600x2560_rightside_up, + }, { /* AYN Loki Max */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Max"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, + }, { /* AYN Loki Zero */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ayn"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Loki Zero"), + }, + .driver_data = (void *)&lcd1080x1920_leftside_up, }, { /* Chuwi HiBook (CWI514) */ .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"), @@ -414,6 +426,12 @@ static const struct dmi_system_id orientation_data[] = { DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ONE XPLAYER"), }, .driver_data = (void *)&lcd1600x2560_leftside_up, + }, { /* OrangePi Neo */ + .matches = { + DMI_EXACT_MATCH(DMI_SYS_VENDOR, "OrangePi"), + DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "NEO-01"), + }, + .driver_data = (void *)&lcd1200x1920_rightside_up, }, { /* Samsung GalaxyBook 10.6 */ .matches = { DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."), diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c index 071668bfe5d14e2952bf2d0af3761b82743ebe30..6c3333136737e7b2db71404e18242276399ae663 100644 --- a/drivers/gpu/drm/i915/display/intel_backlight.c +++ b/drivers/gpu/drm/i915/display/intel_backlight.c @@ -1449,6 +1449,9 @@ bxt_setup_backlight(struct intel_connector *connector, enum pipe unused) static int cnp_num_backlight_controllers(struct drm_i915_private *i915) { + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c index 2edffe62f360c36519b359acfac5eb743a2fa7e5..b0101d72b9c1aed90abb34bde4fc78f76fa2f7cd 100644 --- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c +++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c @@ -39,7 +39,9 @@ static u32 transcoder_to_stream_enc_status(enum transcoder cpu_transcoder) static void intel_dp_hdcp_wait_for_cp_irq(struct intel_connector *connector, int timeout) { - struct intel_hdcp *hdcp = &connector->hdcp; + struct intel_digital_port *dig_port = intel_attached_dig_port(connector); + struct intel_dp *dp = &dig_port->dp; + struct intel_hdcp *hdcp = &dp->attached_connector->hdcp; long ret; #define C (hdcp->cp_irq_count_cached != atomic_read(&hdcp->cp_irq_count)) diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c index 90998b037349d398e338af5a1b01c2ec80d91186..292d163036b12a3817f77f40906854b729a42c92 100644 --- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c +++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c @@ -1658,7 +1658,7 @@ static void skl_wrpll_params_populate(struct skl_wrpll_params *params, } static int -skl_ddi_calculate_wrpll(int clock /* in Hz */, +skl_ddi_calculate_wrpll(int clock, int ref_clock, struct skl_wrpll_params *wrpll_params) { @@ -1683,7 +1683,7 @@ skl_ddi_calculate_wrpll(int clock /* in Hz */, }; unsigned int dco, d, i; unsigned int p0, p1, p2; - u64 afe_clock = clock * 5; /* AFE Clock is 5x Pixel clock */ + u64 afe_clock = (u64)clock * 1000 * 5; /* AFE Clock is 5x Pixel clock, in Hz */ for (d = 0; d < ARRAY_SIZE(dividers); d++) { for (dco = 0; dco < ARRAY_SIZE(dco_central_freq); dco++) { @@ -1808,7 +1808,7 @@ static int skl_ddi_hdmi_pll_dividers(struct intel_crtc_state *crtc_state) struct skl_wrpll_params wrpll_params = {}; int ret; - ret = skl_ddi_calculate_wrpll(crtc_state->port_clock * 1000, + ret = skl_ddi_calculate_wrpll(crtc_state->port_clock, i915->display.dpll.ref_clks.nssc, &wrpll_params); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h index a568a457e53268011a6c932efc7f3dc1b7cb9fb0..f590d7f48ba74fdbc822de1e649180ced766c6de 100644 --- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h +++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h @@ -251,7 +251,7 @@ #define HDCP2_STREAM_STATUS(dev_priv, trans, port) \ (TRANS_HDCP(dev_priv) ? \ TRANS_HDCP2_STREAM_STATUS(trans) : \ - PIPE_HDCP2_STREAM_STATUS(pipe)) + PIPE_HDCP2_STREAM_STATUS(port)) #define _PORTA_HDCP2_AUTH_STREAM 0x66F00 #define _PORTB_HDCP2_AUTH_STREAM 0x66F04 diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c index 42306bc4ba864aa78dfae2a19a55f9e9c4f72834..7ce926241e83a234ed36d62cb8ef6c4710a214f7 100644 --- a/drivers/gpu/drm/i915/display/intel_pps.c +++ b/drivers/gpu/drm/i915/display/intel_pps.c @@ -351,6 +351,9 @@ static int intel_num_pps(struct drm_i915_private *i915) if (IS_GEMINILAKE(i915) || IS_BROXTON(i915)) return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_MTL) + return 2; + if (INTEL_PCH_TYPE(i915) >= PCH_DG1) return 1; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index a2195e28b625fe3593667315d158eb8abfca43a8..cac6d4184506cc7c289065a37e47e43bf344dae3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -290,6 +290,41 @@ out: return i915_error_to_vmf_fault(err); } +static void set_address_limits(struct vm_area_struct *area, + struct i915_vma *vma, + unsigned long obj_offset, + unsigned long *start_vaddr, + unsigned long *end_vaddr) +{ + unsigned long vm_start, vm_end, vma_size; /* user's memory parameters */ + long start, end; /* memory boundaries */ + + /* + * Let's move into the ">> PAGE_SHIFT" + * domain to be sure not to lose bits + */ + vm_start = area->vm_start >> PAGE_SHIFT; + vm_end = area->vm_end >> PAGE_SHIFT; + vma_size = vma->size >> PAGE_SHIFT; + + /* + * Calculate the memory boundaries by considering the offset + * provided by the user during memory mapping and the offset + * provided for the partial mapping. + */ + start = vm_start; + start -= obj_offset; + start += vma->gtt_view.partial.offset; + end = start + vma_size; + + start = max_t(long, start, vm_start); + end = min_t(long, end, vm_end); + + /* Let's move back into the "<< PAGE_SHIFT" domain */ + *start_vaddr = (unsigned long)start << PAGE_SHIFT; + *end_vaddr = (unsigned long)end << PAGE_SHIFT; +} + static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) { #define MIN_CHUNK_PAGES (SZ_1M >> PAGE_SHIFT) @@ -302,14 +337,18 @@ static vm_fault_t vm_fault_gtt(struct vm_fault *vmf) struct i915_ggtt *ggtt = to_gt(i915)->ggtt; bool write = area->vm_flags & VM_WRITE; struct i915_gem_ww_ctx ww; + unsigned long obj_offset; + unsigned long start, end; /* memory boundaries */ intel_wakeref_t wakeref; struct i915_vma *vma; pgoff_t page_offset; + unsigned long pfn; int srcu; int ret; - /* We don't use vmf->pgoff since that has the fake offset */ + obj_offset = area->vm_pgoff - drm_vma_node_start(&mmo->vma_node); page_offset = (vmf->address - area->vm_start) >> PAGE_SHIFT; + page_offset += obj_offset; trace_i915_gem_object_fault(obj, page_offset, true, write); @@ -402,12 +441,14 @@ retry: if (ret) goto err_unpin; + set_address_limits(area, vma, obj_offset, &start, &end); + + pfn = (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT; + pfn += (start - area->vm_start) >> PAGE_SHIFT; + pfn += obj_offset - vma->gtt_view.partial.offset; + /* Finally, remap it using the new GTT offset */ - ret = remap_io_mapping(area, - area->vm_start + (vma->gtt_view.partial.offset << PAGE_SHIFT), - (ggtt->gmadr.start + i915_ggtt_offset(vma)) >> PAGE_SHIFT, - min_t(u64, vma->size, area->vm_end - area->vm_start), - &ggtt->iomap); + ret = remap_io_mapping(area, start, pfn, end - start, &ggtt->iomap); if (ret) goto err_fence; @@ -1084,6 +1125,8 @@ int i915_gem_fb_mmap(struct drm_i915_gem_object *obj, struct vm_area_struct *vma mmo = mmap_offset_attach(obj, mmap_type, NULL); if (IS_ERR(mmo)) return PTR_ERR(mmo); + + vma->vm_pgoff += drm_vma_node_start(&mmo->vma_node); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index e6f177183c0fa7abd1e127337ce27a2ded4a5a14..5c72462d1f57e3625f3b8271524a6e3f3f718012 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -165,7 +165,6 @@ i915_ttm_placement_from_obj(const struct drm_i915_gem_object *obj, i915_ttm_place_from_region(num_allowed ? obj->mm.placements[0] : obj->mm.region, &places[0], obj->bo_offset, obj->base.size, flags); - places[0].flags |= TTM_PL_FLAG_DESIRED; /* Cache this on object? */ for (i = 0; i < num_allowed; ++i) { @@ -779,13 +778,16 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, .interruptible = true, .no_wait_gpu = false, }; - int real_num_busy; + struct ttm_placement initial_placement; + struct ttm_place initial_place; int ret; /* First try only the requested placement. No eviction. */ - real_num_busy = placement->num_placement; - placement->num_placement = 1; - ret = ttm_bo_validate(bo, placement, &ctx); + initial_placement.num_placement = 1; + memcpy(&initial_place, placement->placement, sizeof(struct ttm_place)); + initial_place.flags |= TTM_PL_FLAG_DESIRED; + initial_placement.placement = &initial_place; + ret = ttm_bo_validate(bo, &initial_placement, &ctx); if (ret) { ret = i915_ttm_err_to_gem(ret); /* @@ -800,7 +802,6 @@ static int __i915_ttm_get_pages(struct drm_i915_gem_object *obj, * If the initial attempt fails, allow all accepted placements, * evicting if necessary. */ - placement->num_placement = real_num_busy; ret = ttm_bo_validate(bo, placement, &ctx); if (ret) return i915_ttm_err_to_gem(ret); diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 0b1cd4c7a525f4b53049936f7c31c8a0da8d8509..025a79fe5920e428882d347bc169fb27e48cf409 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -2748,26 +2748,6 @@ oa_configure_all_contexts(struct i915_perf_stream *stream, return 0; } -static int -gen12_configure_all_contexts(struct i915_perf_stream *stream, - const struct i915_oa_config *oa_config, - struct i915_active *active) -{ - struct flex regs[] = { - { - GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE), - CTX_R_PWR_CLK_STATE, - }, - }; - - if (stream->engine->class != RENDER_CLASS) - return 0; - - return oa_configure_all_contexts(stream, - regs, ARRAY_SIZE(regs), - active); -} - static int lrc_configure_all_contexts(struct i915_perf_stream *stream, const struct i915_oa_config *oa_config, @@ -2874,7 +2854,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, { struct drm_i915_private *i915 = stream->perf->i915; struct intel_uncore *uncore = stream->uncore; - struct i915_oa_config *oa_config = stream->oa_config; bool periodic = stream->periodic; u32 period_exponent = stream->period_exponent; u32 sqcnt1; @@ -2918,15 +2897,6 @@ gen12_enable_metric_set(struct i915_perf_stream *stream, intel_uncore_rmw(uncore, GEN12_SQCNT1, 0, sqcnt1); - /* - * Update all contexts prior writing the mux configurations as we need - * to make sure all slices/subslices are ON before writing to NOA - * registers. - */ - ret = gen12_configure_all_contexts(stream, oa_config, active); - if (ret) - return ret; - /* * For Gen12, performance counters are context * saved/restored. Only enable it for the context that @@ -2980,9 +2950,6 @@ static void gen12_disable_metric_set(struct i915_perf_stream *stream) _MASKED_BIT_DISABLE(GEN12_DISABLE_DOP_GATING)); } - /* Reset all contexts' slices/subslices configurations. */ - gen12_configure_all_contexts(stream, NULL, NULL); - /* disable the context save/restore or OAR counters */ if (stream->ctx) gen12_configure_oar_context(stream, NULL); diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c index ae5c6ec24a1e61d4ca463dcd1b7d249e836a6a4b..77b50c56c124ce48831aa2839c9fc45548a86e37 100644 --- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c +++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c @@ -539,8 +539,8 @@ static int mtk_drm_kms_init(struct drm_device *drm) } /* IGT will check if the cursor size is configured */ - drm->mode_config.cursor_width = drm->mode_config.max_width; - drm->mode_config.cursor_height = drm->mode_config.max_height; + drm->mode_config.cursor_width = 512; + drm->mode_config.cursor_height = 512; /* Use OVL device for all DMA memory allocations */ crtc = drm_crtc_from_index(drm, 0); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 1c6626747b98fae4b3373be9bb6c1abf4f80e2bf..ecc3fc5cec22709adfce8ef4cad763e4a62bab5d 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -99,7 +99,7 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname, * was a bad idea, and is only provided for backwards * compatibility for older targets. */ - return -ENODEV; + return -ENOENT; } if (IS_ERR(fw)) { diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c index 34c56e855af778dbf3d26d3343ae1d5b57ac0a37..3b171bf227d16f301545eefeac1e2bf61085b218 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c @@ -1171,8 +1171,6 @@ static void dpu_encoder_virt_atomic_mode_set(struct drm_encoder *drm_enc, cstate->num_mixers = num_lm; - dpu_enc->connector = conn_state->connector; - for (i = 0; i < dpu_enc->num_phys_encs; i++) { struct dpu_encoder_phys *phys = dpu_enc->phys_encs[i]; @@ -1270,6 +1268,8 @@ static void dpu_encoder_virt_atomic_enable(struct drm_encoder *drm_enc, dpu_enc->commit_done_timedout = false; + dpu_enc->connector = drm_atomic_get_new_connector_for_encoder(state, drm_enc); + cur_mode = &dpu_enc->base.crtc->state->adjusted_mode; dpu_enc->wide_bus_en = dpu_encoder_is_widebus_enabled(drm_enc); diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c index fc178ec73907c6b933a6a9588060f23ca3c55298..648c8d0a4c362d46eec87b47b733fc201c925379 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c @@ -308,8 +308,8 @@ static const u32 wb2_formats_rgb_yuv[] = { { \ .maxdwnscale = SSPP_UNITY_SCALE, \ .maxupscale = SSPP_UNITY_SCALE, \ - .format_list = plane_formats_yuv, \ - .num_formats = ARRAY_SIZE(plane_formats_yuv), \ + .format_list = plane_formats, \ + .num_formats = ARRAY_SIZE(plane_formats), \ .virt_format_list = plane_formats, \ .virt_num_formats = ARRAY_SIZE(plane_formats), \ } diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h index e2adc937ea63b23cb3bbb9ca367e8b8290e6d6f1..935ff6fd172c41a2f2c719f3479294214e512aad 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.h @@ -31,24 +31,14 @@ * @fmt: Pointer to format string */ #define DPU_DEBUG(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_KMS)) \ - DRM_DEBUG(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) /** * DPU_DEBUG_DRIVER - macro for hardware driver logging * @fmt: Pointer to format string */ #define DPU_DEBUG_DRIVER(fmt, ...) \ - do { \ - if (drm_debug_enabled(DRM_UT_DRIVER)) \ - DRM_ERROR(fmt, ##__VA_ARGS__); \ - else \ - pr_debug(fmt, ##__VA_ARGS__); \ - } while (0) + DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__) #define DPU_ERROR(fmt, ...) pr_err("[dpu error]" fmt, ##__VA_ARGS__) #define DPU_ERROR_RATELIMITED(fmt, ...) pr_err_ratelimited("[dpu error]" fmt, ##__VA_ARGS__) diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c index 40c4dd2c3139fe7e56361e12135ae299128d139e..29298e0661635923d9ba22357b7d09a309ef7fba 100644 --- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c +++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c @@ -681,6 +681,9 @@ static int dpu_plane_prepare_fb(struct drm_plane *plane, new_state->fb, &layout); if (ret) { DPU_ERROR_PLANE(pdpu, "failed to get format layout, %d\n", ret); + if (pstate->aspace) + msm_framebuffer_cleanup(new_state->fb, pstate->aspace, + pstate->needs_dirtyfb); return ret; } @@ -744,10 +747,9 @@ static int dpu_plane_atomic_check_pipe(struct dpu_plane *pdpu, min_src_size = MSM_FORMAT_IS_YUV(fmt) ? 2 : 1; if (MSM_FORMAT_IS_YUV(fmt) && - (!pipe->sspp->cap->sblk->scaler_blk.len || - !pipe->sspp->cap->sblk->csc_blk.len)) { + !pipe->sspp->cap->sblk->csc_blk.len) { DPU_DEBUG_PLANE(pdpu, - "plane doesn't have scaler/csc for yuv\n"); + "plane doesn't have csc for yuv\n"); return -EINVAL; } @@ -864,6 +866,10 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, max_linewidth = pdpu->catalog->caps->max_linewidth; + drm_rect_rotate(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if ((drm_rect_width(&pipe_cfg->src_rect) > max_linewidth) || _dpu_plane_calc_clk(&crtc_state->adjusted_mode, pipe_cfg) > max_mdp_clk_rate) { /* @@ -913,6 +919,14 @@ static int dpu_plane_atomic_check(struct drm_plane *plane, r_pipe_cfg->dst_rect.x1 = pipe_cfg->dst_rect.x2; } + drm_rect_rotate_inv(&pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + if (r_pipe->sspp) + drm_rect_rotate_inv(&r_pipe_cfg->src_rect, + new_plane_state->fb->width, new_plane_state->fb->height, + new_plane_state->rotation); + ret = dpu_plane_atomic_check_pipe(pdpu, pipe, pipe_cfg, fmt, &crtc_state->adjusted_mode); if (ret) return ret; diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c index 7bc8a9f0657a93f156b8d3b8da8f357d3de7e556..f342fc5ae41ecc7081ed58105c40f8f17339b923 100644 --- a/drivers/gpu/drm/msm/dp/dp_ctrl.c +++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c @@ -1286,6 +1286,8 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl, link_info.rate = ctrl->link->link_params.rate; link_info.capabilities = DP_LINK_CAP_ENHANCED_FRAMING; + dp_link_reset_phy_params_vx_px(ctrl->link); + dp_aux_link_configure(ctrl->aux, &link_info); if (drm_dp_max_downspread(dpcd)) diff --git a/drivers/gpu/drm/msm/dp/dp_panel.c b/drivers/gpu/drm/msm/dp/dp_panel.c index a916b5f3b3170a53796e971b02a00ac1d825b374..6ff6c9ef351ff25a8f8b30dba45bd459ec737d2e 100644 --- a/drivers/gpu/drm/msm/dp/dp_panel.c +++ b/drivers/gpu/drm/msm/dp/dp_panel.c @@ -90,22 +90,22 @@ static int dp_panel_read_dpcd(struct dp_panel *dp_panel) static u32 dp_panel_get_supported_bpp(struct dp_panel *dp_panel, u32 mode_edid_bpp, u32 mode_pclk_khz) { - struct dp_link_info *link_info; + const struct dp_link_info *link_info; const u32 max_supported_bpp = 30, min_supported_bpp = 18; - u32 bpp = 0, data_rate_khz = 0; + u32 bpp, data_rate_khz; - bpp = min_t(u32, mode_edid_bpp, max_supported_bpp); + bpp = min(mode_edid_bpp, max_supported_bpp); link_info = &dp_panel->link_info; data_rate_khz = link_info->num_lanes * link_info->rate * 8; - while (bpp > min_supported_bpp) { + do { if (mode_pclk_khz * bpp <= data_rate_khz) - break; + return bpp; bpp -= 6; - } + } while (bpp > min_supported_bpp); - return bpp; + return min_supported_bpp; } int dp_panel_read_sink_caps(struct dp_panel *dp_panel, @@ -423,8 +423,9 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel) drm_mode->clock); drm_dbg_dp(panel->drm_dev, "bpp = %d\n", dp_panel->dp_mode.bpp); - dp_panel->dp_mode.bpp = max_t(u32, 18, - min_t(u32, dp_panel->dp_mode.bpp, 30)); + dp_panel->dp_mode.bpp = dp_panel_get_mode_bpp(dp_panel, dp_panel->dp_mode.bpp, + dp_panel->dp_mode.drm_mode.clock); + drm_dbg_dp(panel->drm_dev, "updated bpp = %d\n", dp_panel->dp_mode.bpp); diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c index d90b9471ba6ff98e96b8330716814284d53848be..faa88fd6eb4d6aec383a242b66a2b5125c91b3bc 100644 --- a/drivers/gpu/drm/msm/msm_mdss.c +++ b/drivers/gpu/drm/msm/msm_mdss.c @@ -577,7 +577,7 @@ static const struct msm_mdss_data sc7180_data = { .ubwc_enc_version = UBWC_2_0, .ubwc_dec_version = UBWC_2_0, .ubwc_static = 0x1e, - .highest_bank_bit = 0x3, + .highest_bank_bit = 0x1, .reg_bus_bw = 76800, }; diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c index 0712d0b15170260bf53e63831b9638ccab8bd6db..70fb003a66669a1638b22514e7d385494a5d73cc 100644 --- a/drivers/gpu/drm/nouveau/nouveau_bo.c +++ b/drivers/gpu/drm/nouveau/nouveau_bo.c @@ -898,7 +898,7 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, * Without this the operation can timeout and we'll fallback to a * software copy, which might take several minutes to finish. */ - nouveau_fence_wait(fence, false); + nouveau_fence_wait(fence, false, false); ret = ttm_bo_move_accel_cleanup(bo, &fence->base, evict, false, new_reg); nouveau_fence_unref(&fence); diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c index 66fca95c10c761ea597bbb742fc97b2bcd577511..7c97b288680760819971a7823221da5e7179c7ab 100644 --- a/drivers/gpu/drm/nouveau/nouveau_chan.c +++ b/drivers/gpu/drm/nouveau/nouveau_chan.c @@ -72,7 +72,7 @@ nouveau_channel_idle(struct nouveau_channel *chan) ret = nouveau_fence_new(&fence, chan); if (!ret) { - ret = nouveau_fence_wait(fence, false); + ret = nouveau_fence_wait(fence, false, false); nouveau_fence_unref(&fence); } diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c index 6719353e2e13f997cdf9732fc6054b75f49f97ea..6fb65b01d778049f9166681e26a7fdf85891e475 100644 --- a/drivers/gpu/drm/nouveau/nouveau_dmem.c +++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c @@ -128,7 +128,7 @@ static void nouveau_dmem_page_free(struct page *page) static void nouveau_dmem_fence_done(struct nouveau_fence **fence) { if (fence) { - nouveau_fence_wait(*fence, false); + nouveau_fence_wait(*fence, true, false); nouveau_fence_unref(fence); } else { /* diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c index ba469767a20fd9d203c7e2eac56250c80d1de1e8..93f08f9479d89bfda87fbeef246c9dd702f047a1 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.c +++ b/drivers/gpu/drm/nouveau/nouveau_fence.c @@ -311,11 +311,39 @@ nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait) return timeout - t; } +static int +nouveau_fence_wait_busy(struct nouveau_fence *fence, bool intr) +{ + int ret = 0; + + while (!nouveau_fence_done(fence)) { + if (time_after_eq(jiffies, fence->timeout)) { + ret = -EBUSY; + break; + } + + __set_current_state(intr ? + TASK_INTERRUPTIBLE : + TASK_UNINTERRUPTIBLE); + + if (intr && signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + } + + __set_current_state(TASK_RUNNING); + return ret; +} + int -nouveau_fence_wait(struct nouveau_fence *fence, bool intr) +nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr) { long ret; + if (!lazy) + return nouveau_fence_wait_busy(fence, intr); + ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ); if (ret < 0) return ret; diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h index 1b63197b744a207030b4e0b9235c72cfbbc93866..8bc065acfe35870f62bd0f2e37df47a35eb8ae38 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fence.h +++ b/drivers/gpu/drm/nouveau/nouveau_fence.h @@ -23,7 +23,7 @@ void nouveau_fence_unref(struct nouveau_fence **); int nouveau_fence_emit(struct nouveau_fence *); bool nouveau_fence_done(struct nouveau_fence *); -int nouveau_fence_wait(struct nouveau_fence *, bool intr); +int nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr); int nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr); struct nouveau_fence_chan { diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index 2e535caa7d6ea2ecc2582755fb1c271f99b94346..5a887d67dc0e8c71cf1987acb68f48f4bbf05d70 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -928,7 +928,7 @@ revalidate: } if (sync) { - if (!(ret = nouveau_fence_wait(fence, false))) { + if (!(ret = nouveau_fence_wait(fence, false, false))) { if ((ret = dma_fence_get_status(&fence->base)) == 1) ret = 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c index b58ab595faf8278c661ce2c9988c09c98f47a515..cd95446d6851113995f8723cdd3a8a022d05f356 100644 --- a/drivers/gpu/drm/nouveau/nouveau_prime.c +++ b/drivers/gpu/drm/nouveau/nouveau_prime.c @@ -64,7 +64,8 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev, * to the caller, instead of a normal nouveau_bo ttm reference. */ ret = drm_gem_object_init(dev, &nvbo->bo.base, size); if (ret) { - nouveau_bo_ref(NULL, &nvbo); + drm_gem_object_release(&nvbo->bo.base); + kfree(nvbo); obj = ERR_PTR(-ENOMEM); goto unlock; } diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c index 9402fa320a7e9d28380da59aef38127b284d696f..48f105239f42d8ffa3cefd253bd83d52dbb3255f 100644 --- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c +++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c @@ -1803,6 +1803,7 @@ nouveau_uvmm_bo_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec) { struct nouveau_bo *nvbo = nouveau_gem_object(vm_bo->obj); + nouveau_bo_placement_set(nvbo, nvbo->valid_domains, 0); return nouveau_bo_validate(nvbo, true, false); } diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c index adc60b25f8e6c61e51c7db37805fa24a89378fae..0af01a0ec601686254f62b1e445d1ee7f3644403 100644 --- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c +++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c @@ -205,7 +205,8 @@ nvkm_firmware_dtor(struct nvkm_firmware *fw) break; case NVKM_FIRMWARE_IMG_DMA: nvkm_memory_unref(&memory); - dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys); + dma_free_noncoherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), + fw->img, fw->phys, DMA_TO_DEVICE); break; case NVKM_FIRMWARE_IMG_SGT: nvkm_memory_unref(&memory); @@ -236,10 +237,12 @@ nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name, break; case NVKM_FIRMWARE_IMG_DMA: { dma_addr_t addr; - len = ALIGN(fw->len, PAGE_SIZE); - fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL); + fw->img = dma_alloc_noncoherent(fw->device->dev, + len, &addr, + DMA_TO_DEVICE, + GFP_KERNEL); if (fw->img) { memcpy(fw->img, src, fw->len); fw->phys = addr; diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c index 80a480b12174680b859cd0468eb087c9ffba8055..a1c8545f1249a1ac669ea95b1dfe2afc4dd5a8eb 100644 --- a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c +++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c @@ -89,6 +89,12 @@ nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user, nvkm_falcon_fw_dtor_sigs(fw); } + /* after last write to the img, sync dma mappings */ + dma_sync_single_for_device(fw->fw.device->dev, + fw->fw.phys, + sg_dma_len(&fw->fw.mem.sgl), + DMA_TO_DEVICE); + FLCNFW_DBG(fw, "resetting"); fw->func->reset(fw); diff --git a/drivers/gpu/drm/omapdrm/Kconfig b/drivers/gpu/drm/omapdrm/Kconfig index 3f7139e211d21b955f11720164e0b1b2a93117e3..64e440a2649b01cb1521dc7dd73b011464ac7061 100644 --- a/drivers/gpu/drm/omapdrm/Kconfig +++ b/drivers/gpu/drm/omapdrm/Kconfig @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only config DRM_OMAP tristate "OMAP DRM" + depends on MMU depends on DRM && OF depends on ARCH_OMAP2PLUS || (COMPILE_TEST && PAGE_SIZE_LESS_THAN_64KB) select DRM_KMS_HELPER diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c index 1fe6e0d883c79be07817a3cf582f128c9d5d97dd..e5577d2a19ef497b9da601a42dc241f9a460dd6a 100644 --- a/drivers/gpu/drm/radeon/evergreen_cs.c +++ b/drivers/gpu/drm/radeon/evergreen_cs.c @@ -33,8 +33,10 @@ #include "evergreen_reg_safe.h" #include "cayman_reg_safe.h" +#ifndef MIN #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif #define REG_SAFE_BM_SIZE ARRAY_SIZE(evergreen_reg_safe_bm) diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c index 2241e53a29469090a85891d4ae7f86387c2c0c9b..dec6913cec5b59077123a33e8c886c3fe688ea54 100644 --- a/drivers/gpu/drm/rockchip/inno_hdmi.c +++ b/drivers/gpu/drm/rockchip/inno_hdmi.c @@ -279,7 +279,6 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, const u8 *buffer, size_t len) { struct inno_hdmi *hdmi = connector_to_inno_hdmi(connector); - u8 packed_frame[HDMI_MAXIMUM_INFO_FRAME_SIZE]; ssize_t i; if (type != HDMI_INFOFRAME_TYPE_AVI) { @@ -291,8 +290,7 @@ static int inno_hdmi_upload_frame(struct drm_connector *connector, inno_hdmi_disable_frame(connector, type); for (i = 0; i < len; i++) - hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, - packed_frame[i]); + hdmi_writeb(hdmi, HDMI_CONTROL_PACKET_ADDR + i, buffer[i]); return 0; } diff --git a/drivers/gpu/drm/tests/drm_gem_shmem_test.c b/drivers/gpu/drm/tests/drm_gem_shmem_test.c index c3758faa1b83a36cf76448c339396744109f77ea..d8d0e4d1682fcad3d2156562e52eca89dc4b5e74 100644 --- a/drivers/gpu/drm/tests/drm_gem_shmem_test.c +++ b/drivers/gpu/drm/tests/drm_gem_shmem_test.c @@ -102,6 +102,17 @@ static void drm_gem_shmem_test_obj_create_private(struct kunit *test) sg_init_one(sgt->sgl, buf, TEST_SIZE); + /* + * Set the DMA mask to 64-bits and map the sgtables + * otherwise drm_gem_shmem_free will cause a warning + * on debug kernels. + */ + ret = dma_set_mask(drm_dev->dev, DMA_BIT_MASK(64)); + KUNIT_ASSERT_EQ(test, ret, 0); + + ret = dma_map_sgtable(drm_dev->dev, sgt, DMA_BIDIRECTIONAL, 0); + KUNIT_ASSERT_EQ(test, ret, 0); + /* Init a mock DMA-BUF */ buf_mock.size = TEST_SIZE; attach_mock.dmabuf = &buf_mock; diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h index 49089eefb7c7d1030ac1ad3322b1cfaac79701d0..a0febdb6f21452d97ec44aba8ea07a371aa3edca 100644 --- a/drivers/gpu/drm/v3d/v3d_drv.h +++ b/drivers/gpu/drm/v3d/v3d_drv.h @@ -565,6 +565,10 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo); void v3d_mmu_remove_ptes(struct v3d_bo *bo); /* v3d_sched.c */ +void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count); +void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count); void v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue); int v3d_sched_init(struct v3d_dev *v3d); void v3d_sched_fini(struct v3d_dev *v3d); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index 271a6d0f5aca787cc4ece0724c355438e1492e0f..b8682818bafa6cd9b39919447d4ac3ee64ceb625 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -73,24 +73,44 @@ v3d_sched_job_free(struct drm_sched_job *sched_job) v3d_job_cleanup(job); } +void +v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + +void +v3d_performance_query_info_free(struct v3d_performance_query_info *query_info, + unsigned int count) +{ + if (query_info->queries) { + unsigned int i; + + for (i = 0; i < count; i++) + drm_syncobj_put(query_info->queries[i].syncobj); + + kvfree(query_info->queries); + } +} + static void v3d_cpu_job_free(struct drm_sched_job *sched_job) { struct v3d_cpu_job *job = to_cpu_job(sched_job); - struct v3d_timestamp_query_info *timestamp_query = &job->timestamp_query; - struct v3d_performance_query_info *performance_query = &job->performance_query; - if (timestamp_query->queries) { - for (int i = 0; i < timestamp_query->count; i++) - drm_syncobj_put(timestamp_query->queries[i].syncobj); - kvfree(timestamp_query->queries); - } + v3d_timestamp_query_info_free(&job->timestamp_query, + job->timestamp_query.count); - if (performance_query->queries) { - for (int i = 0; i < performance_query->count; i++) - drm_syncobj_put(performance_query->queries[i].syncobj); - kvfree(performance_query->queries); - } + v3d_performance_query_info_free(&job->performance_query, + job->performance_query.count); v3d_job_cleanup(&job->base); } @@ -295,7 +315,7 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) struct v3d_dev *v3d = job->base.v3d; struct drm_device *dev = &v3d->drm; struct dma_fence *fence; - int i, csd_cfg0_reg, csd_cfg_reg_count; + int i, csd_cfg0_reg; v3d->csd_job = job; @@ -315,9 +335,17 @@ v3d_csd_job_run(struct drm_sched_job *sched_job) v3d_switch_perfmon(v3d, &job->base); csd_cfg0_reg = V3D_CSD_QUEUED_CFG0(v3d->ver); - csd_cfg_reg_count = v3d->ver < 71 ? 6 : 7; - for (i = 1; i <= csd_cfg_reg_count; i++) + for (i = 1; i <= 6; i++) V3D_CORE_WRITE(0, csd_cfg0_reg + 4 * i, job->args.cfg[i]); + + /* Although V3D 7.1 has an eighth configuration register, we are not + * using it. Therefore, make sure it remains unused. + * + * XXX: Set the CFG7 register + */ + if (v3d->ver >= 71) + V3D_CORE_WRITE(0, V3D_V7_CSD_QUEUED_CFG7, 0); + /* CFG0 write kicks off the job. */ V3D_CORE_WRITE(0, csd_cfg0_reg, job->args.cfg[0]); diff --git a/drivers/gpu/drm/v3d/v3d_submit.c b/drivers/gpu/drm/v3d/v3d_submit.c index 88f63d526b22365b42b90e90d5b451a56e3fda52..4cdfabbf4964f9b9a71343d3f97b466781773bef 100644 --- a/drivers/gpu/drm/v3d/v3d_submit.c +++ b/drivers/gpu/drm/v3d/v3d_submit.c @@ -452,6 +452,8 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_timestamp_query timestamp; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -480,26 +482,34 @@ v3d_get_cpu_timestamp_query_params(struct drm_file *file_priv, offsets = u64_to_user_ptr(timestamp.offsets); syncs = u64_to_user_ptr(timestamp.syncs); - for (int i = 0; i < timestamp.count; i++) { + for (i = 0; i < timestamp.count; i++) { u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = timestamp.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -509,6 +519,8 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, { u32 __user *syncs; struct drm_v3d_reset_timestamp_query reset; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -533,21 +545,29 @@ v3d_get_cpu_reset_timestamp_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; job->timestamp_query.queries[i].offset = reset.offset + 8 * i; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = reset.count; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } /* Get data for the copy timestamp query results job submission. */ @@ -558,7 +578,8 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, { u32 __user *offsets, *syncs; struct drm_v3d_copy_timestamp_query copy; - int i; + unsigned int i; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -591,18 +612,22 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, u32 offset, sync; if (copy_from_user(&offset, offsets++, sizeof(offset))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].offset = offset; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->timestamp_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->timestamp_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->timestamp_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->timestamp_query.count = copy.count; @@ -613,6 +638,10 @@ v3d_get_cpu_copy_query_results_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_timestamp_query_info_free(&job->timestamp_query, i); + return err; } static int @@ -623,6 +652,8 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_reset_performance_query reset; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -637,6 +668,9 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, if (copy_from_user(&reset, ext, sizeof(reset))) return -EFAULT; + if (reset.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(reset.count, @@ -648,39 +682,47 @@ v3d_get_cpu_reset_performance_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(reset.syncs); kperfmon_ids = u64_to_user_ptr(reset.kperfmon_ids); - for (int i = 0; i < reset.count; i++) { + for (i = 0; i < reset.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < reset.nperfmons; j++) { + for (j = 0; j < reset.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = reset.count; job->performance_query.nperfmons = reset.nperfmons; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } static int @@ -691,6 +733,8 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, u32 __user *syncs; u64 __user *kperfmon_ids; struct drm_v3d_copy_performance_query copy; + unsigned int i, j; + int err; if (!job) { DRM_DEBUG("CPU job extension was attached to a GPU job.\n"); @@ -708,6 +752,9 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, if (copy.pad) return -EINVAL; + if (copy.nperfmons > V3D_MAX_PERFMONS) + return -EINVAL; + job->job_type = V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY; job->performance_query.queries = kvmalloc_array(copy.count, @@ -719,34 +766,38 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, syncs = u64_to_user_ptr(copy.syncs); kperfmon_ids = u64_to_user_ptr(copy.kperfmon_ids); - for (int i = 0; i < copy.count; i++) { + for (i = 0; i < copy.count; i++) { u32 sync; u64 ids; u32 __user *ids_pointer; u32 id; if (copy_from_user(&sync, syncs++, sizeof(sync))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } - job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); - if (copy_from_user(&ids, kperfmon_ids++, sizeof(ids))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } ids_pointer = u64_to_user_ptr(ids); - for (int j = 0; j < copy.nperfmons; j++) { + for (j = 0; j < copy.nperfmons; j++) { if (copy_from_user(&id, ids_pointer++, sizeof(id))) { - kvfree(job->performance_query.queries); - return -EFAULT; + err = -EFAULT; + goto error; } job->performance_query.queries[i].kperfmon_ids[j] = id; } + + job->performance_query.queries[i].syncobj = drm_syncobj_find(file_priv, sync); + if (!job->performance_query.queries[i].syncobj) { + err = -ENOENT; + goto error; + } } job->performance_query.count = copy.count; job->performance_query.nperfmons = copy.nperfmons; @@ -759,6 +810,10 @@ v3d_get_cpu_copy_performance_query_params(struct drm_file *file_priv, job->copy.stride = copy.stride; return 0; + +error: + v3d_performance_query_info_free(&job->performance_query, i); + return err; } /* Whenever userspace sets ioctl extensions, v3d_get_extensions parses data diff --git a/drivers/gpu/drm/virtio/virtgpu_submit.c b/drivers/gpu/drm/virtio/virtgpu_submit.c index 1c7c7f61a222895dc5bcef38d5f82132e818c9a1..7d34cf83f5f2bb6a87612dc277da54750d63371c 100644 --- a/drivers/gpu/drm/virtio/virtgpu_submit.c +++ b/drivers/gpu/drm/virtio/virtgpu_submit.c @@ -48,7 +48,7 @@ struct virtio_gpu_submit { static int virtio_gpu_do_fence_wait(struct virtio_gpu_submit *submit, struct dma_fence *in_fence) { - u32 context = submit->fence_ctx + submit->ring_idx; + u64 context = submit->fence_ctx + submit->ring_idx; if (dma_fence_match_context(in_fence, context)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h index b0d87c5f58d8ed711d2c90965e74bbb9ad3101f1..1ac3cb151b117e8dca872290979b94d0ac3ad51d 100644 --- a/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h +++ b/drivers/gpu/drm/vmwgfx/vmw_surface_cache.h @@ -1,6 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /********************************************************** - * Copyright 2021 VMware, Inc. - * SPDX-License-Identifier: GPL-2.0 OR MIT + * + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -31,6 +33,10 @@ #include +#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) ((svga3d_flags) >> 32) +#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ + ((svga3d_flags) & ((uint64_t)U32_MAX)) + static inline u32 clamped_umul32(u32 a, u32 b) { uint64_t tmp = (uint64_t) a*b; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c index 00144632c600ebb6a63ab80c55eae90eed95375f..f42ebc4a7c22552ef0f518390bd725201ea6c34e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright © 2011-2023 VMware, Inc., Palo Alto, CA., USA - * All Rights Reserved. + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -28,15 +28,39 @@ #include "vmwgfx_bo.h" #include "vmwgfx_drv.h" - +#include "vmwgfx_resource_priv.h" #include static void vmw_bo_release(struct vmw_bo *vbo) { + struct vmw_resource *res; + WARN_ON(vbo->tbo.base.funcs && kref_read(&vbo->tbo.base.refcount) != 0); vmw_bo_unmap(vbo); + + xa_destroy(&vbo->detached_resources); + WARN_ON(vbo->is_dumb && !vbo->dumb_surface); + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + WARN_ON(vbo != res->guest_memory_bo); + WARN_ON(!res->guest_memory_bo); + if (res->guest_memory_bo) { + /* Reserve and switch the backing mob. */ + mutex_lock(&res->dev_priv->cmdbuf_mutex); + (void)vmw_resource_reserve(res, false, true); + vmw_resource_mob_detach(res); + if (res->coherent) + vmw_bo_dirty_release(res->guest_memory_bo); + res->guest_memory_bo = NULL; + res->guest_memory_offset = 0; + vmw_resource_unreserve(res, false, false, false, NULL, + 0); + mutex_unlock(&res->dev_priv->cmdbuf_mutex); + } + vmw_surface_unreference(&vbo->dumb_surface); + } drm_gem_object_release(&vbo->tbo.base); } @@ -325,6 +349,11 @@ void vmw_bo_pin_reserved(struct vmw_bo *vbo, bool pin) * */ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) +{ + return vmw_bo_map_and_cache_size(vbo, vbo->tbo.base.size); +} + +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size) { struct ttm_buffer_object *bo = &vbo->tbo; bool not_used; @@ -335,9 +364,10 @@ void *vmw_bo_map_and_cache(struct vmw_bo *vbo) if (virtual) return virtual; - ret = ttm_bo_kmap(bo, 0, PFN_UP(bo->base.size), &vbo->map); + ret = ttm_bo_kmap(bo, 0, PFN_UP(size), &vbo->map); if (ret) - DRM_ERROR("Buffer object map failed: %d.\n", ret); + DRM_ERROR("Buffer object map failed: %d (size: bo = %zu, map = %zu).\n", + ret, bo->base.size, size); return ttm_kmap_obj_virtual(&vbo->map, ¬_used); } @@ -390,6 +420,7 @@ static int vmw_bo_init(struct vmw_private *dev_priv, BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3); vmw_bo->tbo.priority = 3; vmw_bo->res_tree = RB_ROOT; + xa_init(&vmw_bo->detached_resources); params->size = ALIGN(params->size, PAGE_SIZE); drm_gem_private_object_init(vdev, &vmw_bo->tbo.base, params->size); @@ -654,52 +685,6 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, dma_fence_put(&fence->base); } - -/** - * vmw_dumb_create - Create a dumb kms buffer - * - * @file_priv: Pointer to a struct drm_file identifying the caller. - * @dev: Pointer to the drm device. - * @args: Pointer to a struct drm_mode_create_dumb structure - * Return: Zero on success, negative error code on failure. - * - * This is a driver callback for the core drm create_dumb functionality. - * Note that this is very similar to the vmw_bo_alloc ioctl, except - * that the arguments have a different format. - */ -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args) -{ - struct vmw_private *dev_priv = vmw_priv(dev); - struct vmw_bo *vbo; - int cpp = DIV_ROUND_UP(args->bpp, 8); - int ret; - - switch (cpp) { - case 1: /* DRM_FORMAT_C8 */ - case 2: /* DRM_FORMAT_RGB565 */ - case 4: /* DRM_FORMAT_XRGB8888 */ - break; - default: - /* - * Dumb buffers don't allow anything else. - * This is tested via IGT's dumb_buffers - */ - return -EINVAL; - } - - args->pitch = args->width * cpp; - args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); - - ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, - args->size, &args->handle, - &vbo); - /* drop reference from allocate - handle holds it now */ - drm_gem_object_put(&vbo->tbo.base); - return ret; -} - /** * vmw_bo_swap_notify - swapout notify callback. * @@ -853,3 +838,43 @@ void vmw_bo_placement_set_default_accelerated(struct vmw_bo *bo) vmw_bo_placement_set(bo, domain, domain); } + +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_store(&vbo->detached_resources, (unsigned long)res, res, GFP_KERNEL); +} + +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res) +{ + xa_erase(&vbo->detached_resources, (unsigned long)res); +} + +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo) +{ + unsigned long index; + struct vmw_resource *res = NULL; + struct vmw_surface *surf = NULL; + struct rb_node *rb_itr = vbo->res_tree.rb_node; + + if (vbo->is_dumb && vbo->dumb_surface) { + res = &vbo->dumb_surface->res; + goto out; + } + + xa_for_each(&vbo->detached_resources, index, res) { + if (res->func->res_type == vmw_res_surface) + goto out; + } + + for (rb_itr = rb_first(&vbo->res_tree); rb_itr; + rb_itr = rb_next(rb_itr)) { + res = rb_entry(rb_itr, struct vmw_resource, mob_node); + if (res->func->res_type == vmw_res_surface) + goto out; + } + +out: + if (res) + surf = vmw_res_to_srf(res); + return surf; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h index f349642e6190d6933031d08ccd7f353231f0f1da..62b4342d5f7c5053aaca65487e2607c8ea6949a1 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2023-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -35,11 +36,13 @@ #include #include +#include struct vmw_bo_dirty; struct vmw_fence_obj; struct vmw_private; struct vmw_resource; +struct vmw_surface; enum vmw_bo_domain { VMW_BO_DOMAIN_SYS = BIT(0), @@ -85,11 +88,15 @@ struct vmw_bo { struct rb_root res_tree; u32 res_prios[TTM_MAX_BO_PRIORITY]; + struct xarray detached_resources; atomic_t cpu_writers; /* Not ref-counted. Protected by binding_mutex */ struct vmw_resource *dx_query_ctx; struct vmw_bo_dirty *dirty; + + bool is_dumb; + struct vmw_surface *dumb_surface; }; void vmw_bo_placement_set(struct vmw_bo *bo, u32 domain, u32 busy_domain); @@ -124,15 +131,21 @@ void vmw_bo_fence_single(struct ttm_buffer_object *bo, struct vmw_fence_obj *fence); void *vmw_bo_map_and_cache(struct vmw_bo *vbo); +void *vmw_bo_map_and_cache_size(struct vmw_bo *vbo, size_t size); void vmw_bo_unmap(struct vmw_bo *vbo); void vmw_bo_move_notify(struct ttm_buffer_object *bo, struct ttm_resource *mem); void vmw_bo_swap_notify(struct ttm_buffer_object *bo); +void vmw_bo_add_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +void vmw_bo_del_detached_resource(struct vmw_bo *vbo, struct vmw_resource *res); +struct vmw_surface *vmw_bo_surface(struct vmw_bo *vbo); + int vmw_user_bo_lookup(struct drm_file *filp, u32 handle, struct vmw_bo **out); + /** * vmw_bo_adjust_prio - Adjust the buffer object eviction priority * according to attached resources diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h index a1ce41e1c4684ea41c418265d1e35f7b6fbba54b..32f50e59580972dac68b4212c0a15d1c3eebd90e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -762,6 +763,26 @@ extern int vmw_gmr_bind(struct vmw_private *dev_priv, int gmr_id); extern void vmw_gmr_unbind(struct vmw_private *dev_priv, int gmr_id); +/** + * User handles + */ +struct vmw_user_object { + struct vmw_surface *surface; + struct vmw_bo *buffer; +}; + +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, + u32 handle, struct vmw_user_object *uo); +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo); +void vmw_user_object_unref(struct vmw_user_object *uo); +bool vmw_user_object_is_null(struct vmw_user_object *uo); +struct vmw_surface *vmw_user_object_surface(struct vmw_user_object *uo); +struct vmw_bo *vmw_user_object_buffer(struct vmw_user_object *uo); +void *vmw_user_object_map(struct vmw_user_object *uo); +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size); +void vmw_user_object_unmap(struct vmw_user_object *uo); +bool vmw_user_object_is_mapped(struct vmw_user_object *uo); + /** * Resource utilities - vmwgfx_resource.c */ @@ -776,11 +797,6 @@ extern int vmw_resource_validate(struct vmw_resource *res, bool intr, extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible, bool no_backup); extern bool vmw_resource_needs_backup(const struct vmw_resource *res); -extern int vmw_user_lookup_handle(struct vmw_private *dev_priv, - struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf); extern int vmw_user_resource_lookup_handle( struct vmw_private *dev_priv, struct ttm_object_file *tfile, @@ -1057,9 +1073,6 @@ int vmw_kms_suspend(struct drm_device *dev); int vmw_kms_resume(struct drm_device *dev); void vmw_kms_lost_device(struct drm_device *dev); -int vmw_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); extern int vmw_resource_pin(struct vmw_resource *res, bool interruptible); extern void vmw_resource_unpin(struct vmw_resource *res); extern enum vmw_res_type vmw_res_type(const struct vmw_resource *res); @@ -1176,6 +1189,15 @@ extern int vmw_gb_surface_reference_ext_ioctl(struct drm_device *dev, int vmw_gb_surface_define(struct vmw_private *dev_priv, const struct vmw_surface_metadata *req, struct vmw_surface **srf_out); +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle); +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); /* * Shader management - vmwgfx_shader.c diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c index 5efc6a766f64e467a68223376b2a97aa62b278e6..588d50ababf604933d68877c516441900dc1f831 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c @@ -32,7 +32,6 @@ #define VMW_FENCE_WRAP (1 << 31) struct vmw_fence_manager { - int num_fence_objects; struct vmw_private *dev_priv; spinlock_t lock; struct list_head fence_list; @@ -124,13 +123,13 @@ static void vmw_fence_obj_destroy(struct dma_fence *f) { struct vmw_fence_obj *fence = container_of(f, struct vmw_fence_obj, base); - struct vmw_fence_manager *fman = fman_from_fence(fence); - spin_lock(&fman->lock); - list_del_init(&fence->head); - --fman->num_fence_objects; - spin_unlock(&fman->lock); + if (!list_empty(&fence->head)) { + spin_lock(&fman->lock); + list_del_init(&fence->head); + spin_unlock(&fman->lock); + } fence->destroy(fence); } @@ -257,7 +256,6 @@ static const struct dma_fence_ops vmw_fence_ops = { .release = vmw_fence_obj_destroy, }; - /* * Execute signal actions on fences recently signaled. * This is done from a workqueue so we don't have to execute @@ -355,7 +353,6 @@ static int vmw_fence_obj_init(struct vmw_fence_manager *fman, goto out_unlock; } list_add_tail(&fence->head, &fman->fence_list); - ++fman->num_fence_objects; out_unlock: spin_unlock(&fman->lock); @@ -403,7 +400,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, u32 passed_seqno) { u32 goal_seqno; - struct vmw_fence_obj *fence; + struct vmw_fence_obj *fence, *next_fence; if (likely(!fman->seqno_valid)) return false; @@ -413,7 +410,7 @@ static bool vmw_fence_goal_new_locked(struct vmw_fence_manager *fman, return false; fman->seqno_valid = false; - list_for_each_entry(fence, &fman->fence_list, head) { + list_for_each_entry_safe(fence, next_fence, &fman->fence_list, head) { if (!list_empty(&fence->seq_passed_actions)) { fman->seqno_valid = true; vmw_fence_goal_write(fman->dev_priv, diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c index 07185c108218fc3b66f7daf620eaaec617138298..b9857f37ca1ac6f81b6285270a63c4de9a0b6f2e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gem.c @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* - * Copyright 2021-2023 VMware, Inc. + * Copyright (c) 2021-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation @@ -78,6 +79,59 @@ static struct sg_table *vmw_gem_object_get_sg_table(struct drm_gem_object *obj) return drm_prime_pages_to_sg(obj->dev, vmw_tt->dma_ttm.pages, vmw_tt->dma_ttm.num_pages); } +static int vmw_gem_vmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(obj); + int ret; + + if (obj->import_attach) { + ret = dma_buf_vmap(obj->import_attach->dmabuf, map); + if (!ret) { + if (drm_WARN_ON(obj->dev, map->is_iomem)) { + dma_buf_vunmap(obj->import_attach->dmabuf, map); + return -EIO; + } + } + } else { + ret = ttm_bo_vmap(bo, map); + } + + return ret; +} + +static void vmw_gem_vunmap(struct drm_gem_object *obj, struct iosys_map *map) +{ + if (obj->import_attach) + dma_buf_vunmap(obj->import_attach->dmabuf, map); + else + drm_gem_ttm_vunmap(obj, map); +} + +static int vmw_gem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) +{ + int ret; + + if (obj->import_attach) { + /* + * Reset both vm_ops and vm_private_data, so we don't end up with + * vm_ops pointing to our implementation if the dma-buf backend + * doesn't set those fields. + */ + vma->vm_private_data = NULL; + vma->vm_ops = NULL; + + ret = dma_buf_mmap(obj->dma_buf, vma, 0); + + /* Drop the reference drm_gem_mmap_obj() acquired.*/ + if (!ret) + drm_gem_object_put(obj); + + return ret; + } + + return drm_gem_ttm_mmap(obj, vma); +} + static const struct vm_operations_struct vmw_vm_ops = { .pfn_mkwrite = vmw_bo_vm_mkwrite, .page_mkwrite = vmw_bo_vm_mkwrite, @@ -94,9 +148,9 @@ static const struct drm_gem_object_funcs vmw_gem_object_funcs = { .pin = vmw_gem_object_pin, .unpin = vmw_gem_object_unpin, .get_sg_table = vmw_gem_object_get_sg_table, - .vmap = drm_gem_ttm_vmap, - .vunmap = drm_gem_ttm_vunmap, - .mmap = drm_gem_ttm_mmap, + .vmap = vmw_gem_vmap, + .vunmap = vmw_gem_vunmap, + .mmap = vmw_gem_mmap, .vm_ops = &vmw_vm_ops, }; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c index 00c4ff684130129d6765dc6457233c32bc4b4228..288ed0bb75cb986c94086323202f18dad3bdb3ee 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -193,13 +194,16 @@ static u32 vmw_du_cursor_mob_size(u32 w, u32 h) */ static u32 *vmw_du_cursor_plane_acquire_image(struct vmw_plane_state *vps) { - if (vps->surf) { - if (vps->surf_mapped) - return vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - return vps->surf->snooper.image; - } else if (vps->bo) - return vmw_bo_map_and_cache(vps->bo); - return NULL; + struct vmw_surface *surf; + + if (vmw_user_object_is_null(&vps->uo)) + return NULL; + + surf = vmw_user_object_surface(&vps->uo); + if (surf && !vmw_user_object_is_mapped(&vps->uo)) + return surf->snooper.image; + + return vmw_user_object_map(&vps->uo); } static bool vmw_du_cursor_plane_has_changed(struct vmw_plane_state *old_vps, @@ -536,22 +540,16 @@ void vmw_du_primary_plane_destroy(struct drm_plane *plane) * vmw_du_plane_unpin_surf - unpins resource associated with a framebuffer surface * * @vps: plane state associated with the display surface - * @unreference: true if we also want to unreference the display. */ -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference) +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps) { - if (vps->surf) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + + if (surf) { if (vps->pinned) { - vmw_resource_unpin(&vps->surf->res); + vmw_resource_unpin(&surf->res); vps->pinned--; } - - if (unreference) { - if (vps->pinned) - DRM_ERROR("Surface still pinned\n"); - vmw_surface_unreference(&vps->surf); - } } } @@ -572,7 +570,7 @@ vmw_du_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - vmw_du_plane_unpin_surf(vps, false); + vmw_du_plane_unpin_surf(vps); } @@ -661,25 +659,14 @@ vmw_du_cursor_plane_cleanup_fb(struct drm_plane *plane, struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } + if (!vmw_user_object_is_null(&vps->uo)) + vmw_user_object_unmap(&vps->uo); vmw_du_cursor_plane_unmap_cm(vps); vmw_du_put_cursor_mob(vcp, vps); - vmw_du_plane_unpin_surf(vps, false); - - if (vps->surf) { - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; - } + vmw_du_plane_unpin_surf(vps); + vmw_user_object_unref(&vps->uo); } @@ -698,64 +685,48 @@ vmw_du_cursor_plane_prepare_fb(struct drm_plane *plane, struct drm_framebuffer *fb = new_state->fb; struct vmw_cursor_plane *vcp = vmw_plane_to_vcp(plane); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); + struct vmw_bo *bo = NULL; int ret = 0; - if (vps->surf) { - if (vps->surf_mapped) { - vmw_bo_unmap(vps->surf->res.guest_memory_bo); - vps->surf_mapped = false; - } - vmw_surface_unreference(&vps->surf); - vps->surf = NULL; - } - - if (vps->bo) { - vmw_bo_unreference(&vps->bo); - vps->bo = NULL; + if (!vmw_user_object_is_null(&vps->uo)) { + vmw_user_object_unmap(&vps->uo); + vmw_user_object_unref(&vps->uo); } if (fb) { if (vmw_framebuffer_to_vfb(fb)->bo) { - vps->bo = vmw_framebuffer_to_vfbd(fb)->buffer; - vmw_bo_reference(vps->bo); + vps->uo.buffer = vmw_framebuffer_to_vfbd(fb)->buffer; + vps->uo.surface = NULL; } else { - vps->surf = vmw_framebuffer_to_vfbs(fb)->surface; - vmw_surface_reference(vps->surf); + memcpy(&vps->uo, &vmw_framebuffer_to_vfbs(fb)->uo, sizeof(vps->uo)); } + vmw_user_object_ref(&vps->uo); } - if (!vps->surf && vps->bo) { - const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { + struct ttm_operation_ctx ctx = {false, false}; - /* - * Not using vmw_bo_map_and_cache() helper here as we need to - * reserve the ttm_buffer_object first which - * vmw_bo_map_and_cache() omits. - */ - ret = ttm_bo_reserve(&vps->bo->tbo, true, false, NULL); - - if (unlikely(ret != 0)) + ret = ttm_bo_reserve(&bo->tbo, true, false, NULL); + if (ret != 0) return -ENOMEM; - ret = ttm_bo_kmap(&vps->bo->tbo, 0, PFN_UP(size), &vps->bo->map); - - ttm_bo_unreserve(&vps->bo->tbo); - - if (unlikely(ret != 0)) + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret != 0) return -ENOMEM; - } else if (vps->surf && !vps->bo && vps->surf->res.guest_memory_bo) { - WARN_ON(vps->surf->snooper.image); - ret = ttm_bo_reserve(&vps->surf->res.guest_memory_bo->tbo, true, false, - NULL); - if (unlikely(ret != 0)) - return -ENOMEM; - vmw_bo_map_and_cache(vps->surf->res.guest_memory_bo); - ttm_bo_unreserve(&vps->surf->res.guest_memory_bo->tbo); - vps->surf_mapped = true; + vmw_bo_pin_reserved(bo, true); + if (vmw_framebuffer_to_vfb(fb)->bo) { + const u32 size = new_state->crtc_w * new_state->crtc_h * sizeof(u32); + + (void)vmw_bo_map_and_cache_size(bo, size); + } else { + vmw_bo_map_and_cache(bo); + } + ttm_bo_unreserve(&bo->tbo); } - if (vps->surf || vps->bo) { + if (!vmw_user_object_is_null(&vps->uo)) { vmw_du_get_cursor_mob(vcp, vps); vmw_du_cursor_plane_map_cm(vps); } @@ -777,14 +748,17 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_plane_state *vps = vmw_plane_state_to_vps(new_state); struct vmw_plane_state *old_vps = vmw_plane_state_to_vps(old_state); + struct vmw_bo *old_bo = NULL; + struct vmw_bo *new_bo = NULL; s32 hotspot_x, hotspot_y; + int ret; hotspot_x = du->hotspot_x + new_state->hotspot_x; hotspot_y = du->hotspot_y + new_state->hotspot_y; - du->cursor_surface = vps->surf; + du->cursor_surface = vmw_user_object_surface(&vps->uo); - if (!vps->surf && !vps->bo) { + if (vmw_user_object_is_null(&vps->uo)) { vmw_cursor_update_position(dev_priv, false, 0, 0); return; } @@ -792,10 +766,26 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, vps->cursor.hotspot_x = hotspot_x; vps->cursor.hotspot_y = hotspot_y; - if (vps->surf) { + if (du->cursor_surface) du->cursor_age = du->cursor_surface->snooper.age; + + if (!vmw_user_object_is_null(&old_vps->uo)) { + old_bo = vmw_user_object_buffer(&old_vps->uo); + ret = ttm_bo_reserve(&old_bo->tbo, false, false, NULL); + if (ret != 0) + return; } + if (!vmw_user_object_is_null(&vps->uo)) { + new_bo = vmw_user_object_buffer(&vps->uo); + if (old_bo != new_bo) { + ret = ttm_bo_reserve(&new_bo->tbo, false, false, NULL); + if (ret != 0) + return; + } else { + new_bo = NULL; + } + } if (!vmw_du_cursor_plane_has_changed(old_vps, vps)) { /* * If it hasn't changed, avoid making the device do extra @@ -813,6 +803,11 @@ vmw_du_cursor_plane_atomic_update(struct drm_plane *plane, hotspot_x, hotspot_y); } + if (old_bo) + ttm_bo_unreserve(&old_bo->tbo); + if (new_bo) + ttm_bo_unreserve(&new_bo->tbo); + du->cursor_x = new_state->crtc_x + du->set_gui_x; du->cursor_y = new_state->crtc_y + du->set_gui_y; @@ -913,7 +908,7 @@ int vmw_du_cursor_plane_atomic_check(struct drm_plane *plane, } if (!vmw_framebuffer_to_vfb(fb)->bo) { - surface = vmw_framebuffer_to_vfbs(fb)->surface; + surface = vmw_user_object_surface(&vmw_framebuffer_to_vfbs(fb)->uo); WARN_ON(!surface); @@ -1074,12 +1069,7 @@ vmw_du_plane_duplicate_state(struct drm_plane *plane) memset(&vps->cursor, 0, sizeof(vps->cursor)); /* Each ref counted resource needs to be acquired again */ - if (vps->surf) - (void) vmw_surface_reference(vps->surf); - - if (vps->bo) - (void) vmw_bo_reference(vps->bo); - + vmw_user_object_ref(&vps->uo); state = &vps->base; __drm_atomic_helper_plane_duplicate_state(plane, state); @@ -1128,11 +1118,7 @@ vmw_du_plane_destroy_state(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(state); /* Should have been freed by cleanup_fb */ - if (vps->surf) - vmw_surface_unreference(&vps->surf); - - if (vps->bo) - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); drm_atomic_helper_plane_destroy_state(plane, state); } @@ -1227,7 +1213,7 @@ static void vmw_framebuffer_surface_destroy(struct drm_framebuffer *framebuffer) vmw_framebuffer_to_vfbs(framebuffer); drm_framebuffer_cleanup(framebuffer); - vmw_surface_unreference(&vfbs->surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); } @@ -1272,29 +1258,41 @@ int vmw_kms_readback(struct vmw_private *dev_priv, return -ENOSYS; } +static int vmw_framebuffer_surface_create_handle(struct drm_framebuffer *fb, + struct drm_file *file_priv, + unsigned int *handle) +{ + struct vmw_framebuffer_surface *vfbs = vmw_framebuffer_to_vfbs(fb); + struct vmw_bo *bo = vmw_user_object_buffer(&vfbs->uo); + + return drm_gem_handle_create(file_priv, &bo->tbo.base, handle); +} static const struct drm_framebuffer_funcs vmw_framebuffer_surface_funcs = { + .create_handle = vmw_framebuffer_surface_create_handle, .destroy = vmw_framebuffer_surface_destroy, .dirty = drm_atomic_helper_dirtyfb, }; static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, - struct vmw_surface *surface, + struct vmw_user_object *uo, struct vmw_framebuffer **out, const struct drm_mode_fb_cmd2 - *mode_cmd, - bool is_bo_proxy) + *mode_cmd) { struct drm_device *dev = &dev_priv->drm; struct vmw_framebuffer_surface *vfbs; enum SVGA3dSurfaceFormat format; + struct vmw_surface *surface; int ret; /* 3D is only supported on HWv8 and newer hosts */ if (dev_priv->active_display_unit == vmw_du_legacy) return -ENOSYS; + surface = vmw_user_object_surface(uo); + /* * Sanity checks. */ @@ -1357,8 +1355,8 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, } drm_helper_mode_fill_fb_struct(dev, &vfbs->base.base, mode_cmd); - vfbs->surface = vmw_surface_reference(surface); - vfbs->is_bo_proxy = is_bo_proxy; + memcpy(&vfbs->uo, uo, sizeof(vfbs->uo)); + vmw_user_object_ref(&vfbs->uo); *out = &vfbs->base; @@ -1370,7 +1368,7 @@ static int vmw_kms_new_framebuffer_surface(struct vmw_private *dev_priv, return 0; out_err2: - vmw_surface_unreference(&surface); + vmw_user_object_unref(&vfbs->uo); kfree(vfbs); out_err1: return ret; @@ -1386,7 +1384,6 @@ static int vmw_framebuffer_bo_create_handle(struct drm_framebuffer *fb, { struct vmw_framebuffer_bo *vfbd = vmw_framebuffer_to_vfbd(fb); - return drm_gem_handle_create(file_priv, &vfbd->buffer->tbo.base, handle); } @@ -1407,86 +1404,6 @@ static const struct drm_framebuffer_funcs vmw_framebuffer_bo_funcs = { .dirty = drm_atomic_helper_dirtyfb, }; -/** - * vmw_create_bo_proxy - create a proxy surface for the buffer object - * - * @dev: DRM device - * @mode_cmd: parameters for the new surface - * @bo_mob: MOB backing the buffer object - * @srf_out: newly created surface - * - * When the content FB is a buffer object, we create a surface as a proxy to the - * same buffer. This way we can do a surface copy rather than a surface DMA. - * This is a more efficient approach - * - * RETURNS: - * 0 on success, error code otherwise - */ -static int vmw_create_bo_proxy(struct drm_device *dev, - const struct drm_mode_fb_cmd2 *mode_cmd, - struct vmw_bo *bo_mob, - struct vmw_surface **srf_out) -{ - struct vmw_surface_metadata metadata = {0}; - uint32_t format; - struct vmw_resource *res; - unsigned int bytes_pp; - int ret; - - switch (mode_cmd->pixel_format) { - case DRM_FORMAT_ARGB8888: - case DRM_FORMAT_XRGB8888: - format = SVGA3D_X8R8G8B8; - bytes_pp = 4; - break; - - case DRM_FORMAT_RGB565: - case DRM_FORMAT_XRGB1555: - format = SVGA3D_R5G6B5; - bytes_pp = 2; - break; - - case 8: - format = SVGA3D_P8; - bytes_pp = 1; - break; - - default: - DRM_ERROR("Invalid framebuffer format %p4cc\n", - &mode_cmd->pixel_format); - return -EINVAL; - } - - metadata.format = format; - metadata.mip_levels[0] = 1; - metadata.num_sizes = 1; - metadata.base_size.width = mode_cmd->pitches[0] / bytes_pp; - metadata.base_size.height = mode_cmd->height; - metadata.base_size.depth = 1; - metadata.scanout = true; - - ret = vmw_gb_surface_define(vmw_priv(dev), &metadata, srf_out); - if (ret) { - DRM_ERROR("Failed to allocate proxy content buffer\n"); - return ret; - } - - res = &(*srf_out)->res; - - /* Reserve and switch the backing mob. */ - mutex_lock(&res->dev_priv->cmdbuf_mutex); - (void) vmw_resource_reserve(res, false, true); - vmw_user_bo_unref(&res->guest_memory_bo); - res->guest_memory_bo = vmw_user_bo_ref(bo_mob); - res->guest_memory_offset = 0; - vmw_resource_unreserve(res, false, false, false, NULL, 0); - mutex_unlock(&res->dev_priv->cmdbuf_mutex); - - return 0; -} - - - static int vmw_kms_new_framebuffer_bo(struct vmw_private *dev_priv, struct vmw_bo *bo, struct vmw_framebuffer **out, @@ -1565,55 +1482,24 @@ vmw_kms_srf_ok(struct vmw_private *dev_priv, uint32_t width, uint32_t height) * vmw_kms_new_framebuffer - Create a new framebuffer. * * @dev_priv: Pointer to device private struct. - * @bo: Pointer to buffer object to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @surface: Pointer to a surface to wrap the kms framebuffer around. - * Either @bo or @surface must be NULL. - * @only_2d: No presents will occur to this buffer object based framebuffer. - * This helps the code to do some important optimizations. + * @uo: Pointer to user object to wrap the kms framebuffer around. + * Either the buffer or surface inside the user object must be NULL. * @mode_cmd: Frame-buffer metadata. */ struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd) { struct vmw_framebuffer *vfb = NULL; - bool is_bo_proxy = false; int ret; - /* - * We cannot use the SurfaceDMA command in an non-accelerated VM, - * therefore, wrap the buffer object in a surface so we can use the - * SurfaceCopy command. - */ - if (vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height) && - bo && only_2d && - mode_cmd->width > 64 && /* Don't create a proxy for cursor */ - dev_priv->active_display_unit == vmw_du_screen_target) { - ret = vmw_create_bo_proxy(&dev_priv->drm, mode_cmd, - bo, &surface); - if (ret) - return ERR_PTR(ret); - - is_bo_proxy = true; - } - /* Create the new framebuffer depending one what we have */ - if (surface) { - ret = vmw_kms_new_framebuffer_surface(dev_priv, surface, &vfb, - mode_cmd, - is_bo_proxy); - /* - * vmw_create_bo_proxy() adds a reference that is no longer - * needed - */ - if (is_bo_proxy) - vmw_surface_unreference(&surface); - } else if (bo) { - ret = vmw_kms_new_framebuffer_bo(dev_priv, bo, &vfb, + if (vmw_user_object_surface(uo)) { + ret = vmw_kms_new_framebuffer_surface(dev_priv, uo, &vfb, + mode_cmd); + } else if (uo->buffer) { + ret = vmw_kms_new_framebuffer_bo(dev_priv, uo->buffer, &vfb, mode_cmd); } else { BUG(); @@ -1635,14 +1521,12 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, { struct vmw_private *dev_priv = vmw_priv(dev); struct vmw_framebuffer *vfb = NULL; - struct vmw_surface *surface = NULL; - struct vmw_bo *bo = NULL; + struct vmw_user_object uo = {0}; int ret; /* returns either a bo or surface */ - ret = vmw_user_lookup_handle(dev_priv, file_priv, - mode_cmd->handles[0], - &surface, &bo); + ret = vmw_user_object_lookup(dev_priv, file_priv, mode_cmd->handles[0], + &uo); if (ret) { DRM_ERROR("Invalid buffer object handle %u (0x%x).\n", mode_cmd->handles[0], mode_cmd->handles[0]); @@ -1650,7 +1534,7 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - if (!bo && + if (vmw_user_object_surface(&uo) && !vmw_kms_srf_ok(dev_priv, mode_cmd->width, mode_cmd->height)) { DRM_ERROR("Surface size cannot exceed %dx%d\n", dev_priv->texture_max_width, @@ -1659,20 +1543,15 @@ static struct drm_framebuffer *vmw_kms_fb_create(struct drm_device *dev, } - vfb = vmw_kms_new_framebuffer(dev_priv, bo, surface, - !(dev_priv->capabilities & SVGA_CAP_3D), - mode_cmd); + vfb = vmw_kms_new_framebuffer(dev_priv, &uo, mode_cmd); if (IS_ERR(vfb)) { ret = PTR_ERR(vfb); goto err_out; } err_out: - /* vmw_user_lookup_handle takes one ref so does new_fb */ - if (bo) - vmw_user_bo_unref(&bo); - if (surface) - vmw_surface_unreference(&surface); + /* vmw_user_object_lookup takes one ref so does new_fb */ + vmw_user_object_unref(&uo); if (ret) { DRM_ERROR("failed to create vmw_framebuffer: %i\n", ret); @@ -2584,72 +2463,6 @@ void vmw_kms_helper_validation_finish(struct vmw_private *dev_priv, vmw_fence_obj_unreference(&fence); } -/** - * vmw_kms_update_proxy - Helper function to update a proxy surface from - * its backing MOB. - * - * @res: Pointer to the surface resource - * @clips: Clip rects in framebuffer (surface) space. - * @num_clips: Number of clips in @clips. - * @increment: Integer with which to increment the clip counter when looping. - * Used to skip a predetermined number of clip rects. - * - * This function makes sure the proxy surface is updated from its backing MOB - * using the region given by @clips. The surface resource @res and its backing - * MOB needs to be reserved and validated on call. - */ -int vmw_kms_update_proxy(struct vmw_resource *res, - const struct drm_clip_rect *clips, - unsigned num_clips, - int increment) -{ - struct vmw_private *dev_priv = res->dev_priv; - struct drm_vmw_size *size = &vmw_res_to_srf(res)->metadata.base_size; - struct { - SVGA3dCmdHeader header; - SVGA3dCmdUpdateGBImage body; - } *cmd; - SVGA3dBox *box; - size_t copy_size = 0; - int i; - - if (!clips) - return 0; - - cmd = VMW_CMD_RESERVE(dev_priv, sizeof(*cmd) * num_clips); - if (!cmd) - return -ENOMEM; - - for (i = 0; i < num_clips; ++i, clips += increment, ++cmd) { - box = &cmd->body.box; - - cmd->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd->header.size = sizeof(cmd->body); - cmd->body.image.sid = res->id; - cmd->body.image.face = 0; - cmd->body.image.mipmap = 0; - - if (clips->x1 > size->width || clips->x2 > size->width || - clips->y1 > size->height || clips->y2 > size->height) { - DRM_ERROR("Invalid clips outsize of framebuffer.\n"); - return -EINVAL; - } - - box->x = clips->x1; - box->y = clips->y1; - box->z = 0; - box->w = clips->x2 - clips->x1; - box->h = clips->y2 - clips->y1; - box->d = 1; - - copy_size += sizeof(*cmd); - } - - vmw_cmd_commit(dev_priv, copy_size); - - return 0; -} - /** * vmw_kms_create_implicit_placement_property - Set up the implicit placement * property. @@ -2784,8 +2597,9 @@ int vmw_du_helper_plane_update(struct vmw_du_update_plane *update) } else { struct vmw_framebuffer_surface *vfbs = container_of(update->vfb, typeof(*vfbs), base); + struct vmw_surface *surf = vmw_user_object_surface(&vfbs->uo); - ret = vmw_validation_add_resource(&val_ctx, &vfbs->surface->res, + ret = vmw_validation_add_resource(&val_ctx, &surf->res, 0, VMW_RES_DIRTY_NONE, NULL, NULL); } @@ -2941,3 +2755,93 @@ int vmw_connector_get_modes(struct drm_connector *connector) return num_modes; } + +struct vmw_user_object *vmw_user_object_ref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_ref(uo->buffer); + else if (uo->surface) + vmw_surface_reference(uo->surface); + return uo; +} + +void vmw_user_object_unref(struct vmw_user_object *uo) +{ + if (uo->buffer) + vmw_user_bo_unref(&uo->buffer); + else if (uo->surface) + vmw_surface_unreference(&uo->surface); +} + +struct vmw_bo * +vmw_user_object_buffer(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer; + else if (uo->surface) + return uo->surface->res.guest_memory_bo; + return NULL; +} + +struct vmw_surface * +vmw_user_object_surface(struct vmw_user_object *uo) +{ + if (uo->buffer) + return uo->buffer->dumb_surface; + return uo->surface; +} + +void *vmw_user_object_map(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache(bo); +} + +void *vmw_user_object_map_size(struct vmw_user_object *uo, size_t size) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + + WARN_ON(!bo); + return vmw_bo_map_and_cache_size(bo, size); +} + +void vmw_user_object_unmap(struct vmw_user_object *uo) +{ + struct vmw_bo *bo = vmw_user_object_buffer(uo); + int ret; + + WARN_ON(!bo); + + /* Fence the mob creation so we are guarateed to have the mob */ + ret = ttm_bo_reserve(&bo->tbo, false, false, NULL); + if (ret != 0) + return; + + vmw_bo_unmap(bo); + vmw_bo_pin_reserved(bo, false); + + ttm_bo_unreserve(&bo->tbo); +} + +bool vmw_user_object_is_mapped(struct vmw_user_object *uo) +{ + struct vmw_bo *bo; + + if (!uo || vmw_user_object_is_null(uo)) + return false; + + bo = vmw_user_object_buffer(uo); + + if (WARN_ON(!bo)) + return false; + + WARN_ON(bo->map.bo && !bo->map.virtual); + return bo->map.virtual; +} + +bool vmw_user_object_is_null(struct vmw_user_object *uo) +{ + return !uo->buffer && !uo->surface; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h index bf24f2f0dcfc931fc9788e30c8245f12ed1472e3..6141fadf81efeb2cda7d91274dce16def626e46d 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR MIT */ /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -221,11 +222,9 @@ struct vmw_framebuffer { struct vmw_framebuffer_surface { struct vmw_framebuffer base; - struct vmw_surface *surface; - bool is_bo_proxy; /* true if this is proxy surface for DMA buf */ + struct vmw_user_object uo; }; - struct vmw_framebuffer_bo { struct vmw_framebuffer base; struct vmw_bo *buffer; @@ -277,8 +276,7 @@ struct vmw_cursor_plane_state { */ struct vmw_plane_state { struct drm_plane_state base; - struct vmw_surface *surf; - struct vmw_bo *bo; + struct vmw_user_object uo; int content_fb_type; unsigned long bo_size; @@ -457,9 +455,7 @@ int vmw_kms_readback(struct vmw_private *dev_priv, uint32_t num_clips); struct vmw_framebuffer * vmw_kms_new_framebuffer(struct vmw_private *dev_priv, - struct vmw_bo *bo, - struct vmw_surface *surface, - bool only_2d, + struct vmw_user_object *uo, const struct drm_mode_fb_cmd2 *mode_cmd); void vmw_guess_mode_timing(struct drm_display_mode *mode); void vmw_kms_update_implicit_fb(struct vmw_private *dev_priv); @@ -486,8 +482,7 @@ void vmw_du_plane_reset(struct drm_plane *plane); struct drm_plane_state *vmw_du_plane_duplicate_state(struct drm_plane *plane); void vmw_du_plane_destroy_state(struct drm_plane *plane, struct drm_plane_state *state); -void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps, - bool unreference); +void vmw_du_plane_unpin_surf(struct vmw_plane_state *vps); int vmw_du_crtc_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 5befc2719a4984a0f8d5cb31d6dd855a4f38521e..39949e0a493f3daa88b1f784c3d0f44f894b0557 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -147,8 +148,9 @@ static int vmw_ldu_fb_pin(struct vmw_framebuffer *vfb) struct vmw_bo *buf; int ret; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); if (!buf) return 0; @@ -169,8 +171,10 @@ static int vmw_ldu_fb_unpin(struct vmw_framebuffer *vfb) struct vmw_private *dev_priv = vmw_priv(vfb->base.dev); struct vmw_bo *buf; - buf = vfb->bo ? vmw_framebuffer_to_vfbd(&vfb->base)->buffer : - vmw_framebuffer_to_vfbs(&vfb->base)->surface->res.guest_memory_bo; + buf = vfb->bo ? + vmw_framebuffer_to_vfbd(&vfb->base)->buffer : + vmw_user_object_buffer(&vmw_framebuffer_to_vfbs(&vfb->base)->uo); + if (WARN_ON(!buf)) return 0; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c index c45b4724e4141dbfc07df9554c09917b8c9ec9ec..e20f64b67b2669ba8b818a431a57a6a927496732 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_overlay.c @@ -92,7 +92,7 @@ static int vmw_overlay_send_put(struct vmw_private *dev_priv, { struct vmw_escape_video_flush *flush; size_t fifo_size; - bool have_so = (dev_priv->active_display_unit == vmw_du_screen_object); + bool have_so = (dev_priv->active_display_unit != vmw_du_legacy); int i, num_items; SVGAGuestPtr ptr; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c index c99cad444991579f6e665453b74f56cb35de2e15..598b90ac7590b52b4b0e2db6d7e21ca17564eef4 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2013 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2013-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -31,6 +32,7 @@ */ #include "vmwgfx_drv.h" +#include "vmwgfx_bo.h" #include "ttm_object.h" #include @@ -88,13 +90,35 @@ int vmw_prime_handle_to_fd(struct drm_device *dev, uint32_t handle, uint32_t flags, int *prime_fd) { + struct vmw_private *vmw = vmw_priv(dev); struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo; int ret; + int surf_handle; - if (handle > VMWGFX_NUM_MOB) + if (handle > VMWGFX_NUM_MOB) { ret = ttm_prime_handle_to_fd(tfile, handle, flags, prime_fd); - else - ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, flags, prime_fd); + } else { + ret = vmw_user_bo_lookup(file_priv, handle, &vbo); + if (ret) + return ret; + if (vbo && vbo->is_dumb) { + ret = drm_gem_prime_handle_to_fd(dev, file_priv, handle, + flags, prime_fd); + } else { + surf_handle = vmw_lookup_surface_handle_for_buffer(vmw, + vbo, + handle); + if (surf_handle > 0) + ret = ttm_prime_handle_to_fd(tfile, surf_handle, + flags, prime_fd); + else + ret = drm_gem_prime_handle_to_fd(dev, file_priv, + handle, flags, + prime_fd); + } + vmw_user_bo_unref(&vbo); + } return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c index 848dba09981b0d102edcc1288974f994e72b505b..a73af8a355fbf5086df5e41f330bd106ab8be61c 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -58,6 +59,7 @@ void vmw_resource_mob_attach(struct vmw_resource *res) rb_link_node(&res->mob_node, parent, new); rb_insert_color(&res->mob_node, &gbo->res_tree); + vmw_bo_del_detached_resource(gbo, res); vmw_bo_prio_add(gbo, res->used_prio); } @@ -287,28 +289,35 @@ out_bad_resource: * * The pointer this pointed at by out_surf and out_buf needs to be null. */ -int vmw_user_lookup_handle(struct vmw_private *dev_priv, +int vmw_user_object_lookup(struct vmw_private *dev_priv, struct drm_file *filp, - uint32_t handle, - struct vmw_surface **out_surf, - struct vmw_bo **out_buf) + u32 handle, + struct vmw_user_object *uo) { struct ttm_object_file *tfile = vmw_fpriv(filp)->tfile; struct vmw_resource *res; int ret; - BUG_ON(*out_surf || *out_buf); + WARN_ON(uo->surface || uo->buffer); ret = vmw_user_resource_lookup_handle(dev_priv, tfile, handle, user_surface_converter, &res); if (!ret) { - *out_surf = vmw_res_to_srf(res); + uo->surface = vmw_res_to_srf(res); return 0; } - *out_surf = NULL; - ret = vmw_user_bo_lookup(filp, handle, out_buf); + uo->surface = NULL; + ret = vmw_user_bo_lookup(filp, handle, &uo->buffer); + if (!ret && !uo->buffer->is_dumb) { + uo->surface = vmw_lookup_surface_for_buffer(dev_priv, + uo->buffer, + handle); + if (uo->surface) + vmw_user_bo_unref(&uo->buffer); + } + return ret; } diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index df0039a8ef29a21d7cb6b6063f62a4876a25a446..0f4bfd98480afe0ce116ecba4fa9a37c139ad6cf 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2011-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2011-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -240,7 +241,7 @@ static void vmw_sou_crtc_mode_set_nofb(struct drm_crtc *crtc) struct vmw_connector_state *vmw_conn_state; int x, y; - sou->buffer = vps->bo; + sou->buffer = vmw_user_object_buffer(&vps->uo); conn_state = sou->base.connector.state; vmw_conn_state = vmw_connector_state_to_vcs(conn_state); @@ -376,10 +377,11 @@ vmw_sou_primary_plane_cleanup_fb(struct drm_plane *plane, struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); struct drm_crtc *crtc = plane->state->crtc ? plane->state->crtc : old_state->crtc; + struct vmw_bo *bo = vmw_user_object_buffer(&vps->uo); - if (vps->bo) - vmw_bo_unpin(vmw_priv(crtc->dev), vps->bo, false); - vmw_bo_unreference(&vps->bo); + if (bo) + vmw_bo_unpin(vmw_priv(crtc->dev), bo, false); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; vmw_du_plane_cleanup_fb(plane, old_state); @@ -411,9 +413,10 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, .bo_type = ttm_bo_type_device, .pin = true }; + struct vmw_bo *bo = NULL; if (!new_fb) { - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; return 0; @@ -422,17 +425,17 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, bo_params.size = new_state->crtc_w * new_state->crtc_h * 4; dev_priv = vmw_priv(crtc->dev); - if (vps->bo) { + bo = vmw_user_object_buffer(&vps->uo); + if (bo) { if (vps->bo_size == bo_params.size) { /* * Note that this might temporarily up the pin-count * to 2, until cleanup_fb() is called. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, - true); + return vmw_bo_pin_in_vram(dev_priv, bo, true); } - vmw_bo_unreference(&vps->bo); + vmw_user_object_unref(&vps->uo); vps->bo_size = 0; } @@ -442,7 +445,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * resume the overlays, this is preferred to failing to alloc. */ vmw_overlay_pause_all(dev_priv); - ret = vmw_bo_create(dev_priv, &bo_params, &vps->bo); + ret = vmw_gem_object_create(dev_priv, &bo_params, &vps->uo.buffer); vmw_overlay_resume_all(dev_priv); if (ret) return ret; @@ -453,7 +456,7 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. */ - return vmw_bo_pin_in_vram(dev_priv, vps->bo, true); + return vmw_bo_pin_in_vram(dev_priv, vps->uo.buffer, true); } static uint32_t vmw_sou_bo_fifo_size(struct vmw_du_update_plane *update, @@ -580,6 +583,7 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, { struct vmw_kms_sou_dirty_cmd *blit = cmd; struct vmw_framebuffer_surface *vfbs; + struct vmw_surface *surf = NULL; vfbs = container_of(update->vfb, typeof(*vfbs), base); @@ -587,7 +591,8 @@ static uint32_t vmw_sou_surface_pre_clip(struct vmw_du_update_plane *update, blit->header.size = sizeof(blit->body) + sizeof(SVGASignedRect) * num_hits; - blit->body.srcImage.sid = vfbs->surface->res.id; + surf = vmw_user_object_surface(&vfbs->uo); + blit->body.srcImage.sid = surf->res.id; blit->body.destScreenId = update->du->unit; /* Update the source and destination bounding box later in post_clip */ @@ -1104,7 +1109,7 @@ int vmw_kms_sou_do_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c index a04e0736318da6c09876ec4fdfe02ae05c3836a7..5453f7cf0e2d7b05e1020743dc18dcd766617308 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_stdu.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /****************************************************************************** * - * COPYRIGHT (C) 2014-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2014-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -29,6 +30,7 @@ #include "vmwgfx_kms.h" #include "vmwgfx_vkms.h" #include "vmw_surface_cache.h" +#include #include #include @@ -735,7 +737,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, int ret; if (!srf) - srf = &vfbs->surface->res; + srf = &vmw_user_object_surface(&vfbs->uo)->res; ret = vmw_validation_add_resource(&val_ctx, srf, 0, VMW_RES_DIRTY_NONE, NULL, NULL); @@ -746,12 +748,6 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, if (ret) goto out_unref; - if (vfbs->is_bo_proxy) { - ret = vmw_kms_update_proxy(srf, clips, num_clips, inc); - if (ret) - goto out_finish; - } - sdirty.base.fifo_commit = vmw_kms_stdu_surface_fifo_commit; sdirty.base.clip = vmw_kms_stdu_surface_clip; sdirty.base.fifo_reserve_size = sizeof(struct vmw_stdu_surface_copy) + @@ -765,7 +761,7 @@ int vmw_kms_stdu_surface_dirty(struct vmw_private *dev_priv, ret = vmw_kms_helper_dirty(dev_priv, framebuffer, clips, vclips, dest_x, dest_y, num_clips, inc, &sdirty.base); -out_finish: + vmw_kms_helper_validation_finish(dev_priv, NULL, &val_ctx, out_fence, NULL); @@ -877,6 +873,32 @@ vmw_stdu_connector_mode_valid(struct drm_connector *connector, return MODE_OK; } +/* + * Trigger a modeset if the X,Y position of the Screen Target changes. + * This is needed when multi-mon is cycled. The original Screen Target will have + * the same mode but its relative X,Y position in the topology will change. + */ +static int vmw_stdu_connector_atomic_check(struct drm_connector *conn, + struct drm_atomic_state *state) +{ + struct drm_connector_state *conn_state; + struct vmw_screen_target_display_unit *du; + struct drm_crtc_state *new_crtc_state; + + conn_state = drm_atomic_get_connector_state(state, conn); + du = vmw_connector_to_stdu(conn); + + if (!conn_state->crtc) + return 0; + + new_crtc_state = drm_atomic_get_new_crtc_state(state, conn_state->crtc); + if (du->base.gui_x != du->base.set_gui_x || + du->base.gui_y != du->base.set_gui_y) + new_crtc_state->mode_changed = true; + + return 0; +} + static const struct drm_connector_funcs vmw_stdu_connector_funcs = { .dpms = vmw_du_connector_dpms, .detect = vmw_du_connector_detect, @@ -891,7 +913,8 @@ static const struct drm_connector_funcs vmw_stdu_connector_funcs = { static const struct drm_connector_helper_funcs vmw_stdu_connector_helper_funcs = { .get_modes = vmw_connector_get_modes, - .mode_valid = vmw_stdu_connector_mode_valid + .mode_valid = vmw_stdu_connector_mode_valid, + .atomic_check = vmw_stdu_connector_atomic_check, }; @@ -918,9 +941,8 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, { struct vmw_plane_state *vps = vmw_plane_state_to_vps(old_state); - if (vps->surf) + if (vmw_user_object_surface(&vps->uo)) WARN_ON(!vps->pinned); - vmw_du_plane_cleanup_fb(plane, old_state); vps->content_fb_type = SAME_AS_DISPLAY; @@ -928,7 +950,6 @@ vmw_stdu_primary_plane_cleanup_fb(struct drm_plane *plane, } - /** * vmw_stdu_primary_plane_prepare_fb - Readies the display surface * @@ -952,13 +973,15 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, enum stdu_content_type new_content_type; struct vmw_framebuffer_surface *new_vfbs; uint32_t hdisplay = new_state->crtc_w, vdisplay = new_state->crtc_h; + struct drm_plane_state *old_state = plane->state; + struct drm_rect rect; int ret; /* No FB to prepare */ if (!new_fb) { - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } return 0; @@ -968,8 +991,8 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, new_vfbs = (vfb->bo) ? NULL : vmw_framebuffer_to_vfbs(new_fb); if (new_vfbs && - new_vfbs->surface->metadata.base_size.width == hdisplay && - new_vfbs->surface->metadata.base_size.height == vdisplay) + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.width == hdisplay && + vmw_user_object_surface(&new_vfbs->uo)->metadata.base_size.height == vdisplay) new_content_type = SAME_AS_DISPLAY; else if (vfb->bo) new_content_type = SEPARATE_BO; @@ -1007,29 +1030,29 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, metadata.num_sizes = 1; metadata.scanout = true; } else { - metadata = new_vfbs->surface->metadata; + metadata = vmw_user_object_surface(&new_vfbs->uo)->metadata; } metadata.base_size.width = hdisplay; metadata.base_size.height = vdisplay; metadata.base_size.depth = 1; - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { struct drm_vmw_size cur_base_size = - vps->surf->metadata.base_size; + vmw_user_object_surface(&vps->uo)->metadata.base_size; if (cur_base_size.width != metadata.base_size.width || cur_base_size.height != metadata.base_size.height || - vps->surf->metadata.format != metadata.format) { + vmw_user_object_surface(&vps->uo)->metadata.format != metadata.format) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } } - if (!vps->surf) { + if (!vmw_user_object_surface(&vps->uo)) { ret = vmw_gb_surface_define(dev_priv, &metadata, - &vps->surf); + &vps->uo.surface); if (ret != 0) { DRM_ERROR("Couldn't allocate STDU surface.\n"); return ret; @@ -1042,18 +1065,19 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, * The only time we add a reference in prepare_fb is if the * state object doesn't have a reference to begin with */ - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { WARN_ON(vps->pinned != 0); - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); } - vps->surf = vmw_surface_reference(new_vfbs->surface); + memcpy(&vps->uo, &new_vfbs->uo, sizeof(vps->uo)); + vmw_user_object_ref(&vps->uo); } - if (vps->surf) { + if (vmw_user_object_surface(&vps->uo)) { /* Pin new surface before flipping */ - ret = vmw_resource_pin(&vps->surf->res, false); + ret = vmw_resource_pin(&vmw_user_object_surface(&vps->uo)->res, false); if (ret) goto out_srf_unref; @@ -1062,6 +1086,34 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, vps->content_fb_type = new_content_type; + /* + * The drm fb code will do blit's via the vmap interface, which doesn't + * trigger vmw_bo page dirty tracking due to being kernel side (and thus + * doesn't require mmap'ing) so we have to update the surface's dirty + * regions by hand but we want to be careful to not overwrite the + * resource if it has been written to by the gpu (res_dirty). + */ + if (vps->uo.buffer && vps->uo.buffer->is_dumb) { + struct vmw_surface *surf = vmw_user_object_surface(&vps->uo); + struct vmw_resource *res = &surf->res; + + if (!res->res_dirty && drm_atomic_helper_damage_merged(old_state, + new_state, + &rect)) { + /* + * At some point it might be useful to actually translate + * (rect.x1, rect.y1) => start, and (rect.x2, rect.y2) => end, + * but currently the fb code will just report the entire fb + * dirty so in practice it doesn't matter. + */ + pgoff_t start = res->guest_memory_offset >> PAGE_SHIFT; + pgoff_t end = __KERNEL_DIV_ROUND_UP(res->guest_memory_offset + + res->guest_memory_size, + PAGE_SIZE); + vmw_resource_dirty_update(res, start, end); + } + } + /* * This should only happen if the buffer object is too large to create a * proxy surface for. @@ -1072,7 +1124,7 @@ vmw_stdu_primary_plane_prepare_fb(struct drm_plane *plane, return 0; out_srf_unref: - vmw_surface_unreference(&vps->surf); + vmw_user_object_unref(&vps->uo); return ret; } @@ -1214,14 +1266,8 @@ static uint32_t vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_update); return size; @@ -1230,61 +1276,14 @@ vmw_stdu_surface_fifo_size_same_display(struct vmw_du_update_plane *update, static uint32_t vmw_stdu_surface_fifo_size(struct vmw_du_update_plane *update, uint32_t num_hits) { - struct vmw_framebuffer_surface *vfbs; uint32_t size = 0; - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - if (vfbs->is_bo_proxy) - size += sizeof(struct vmw_stdu_update_gb_image) * num_hits; - size += sizeof(struct vmw_stdu_surface_copy) + sizeof(SVGA3dCopyBox) * num_hits + sizeof(struct vmw_stdu_update); return size; } -static uint32_t -vmw_stdu_surface_update_proxy(struct vmw_du_update_plane *update, void *cmd) -{ - struct vmw_framebuffer_surface *vfbs; - struct drm_plane_state *state = update->plane->state; - struct drm_plane_state *old_state = update->old_state; - struct vmw_stdu_update_gb_image *cmd_update = cmd; - struct drm_atomic_helper_damage_iter iter; - struct drm_rect clip; - uint32_t copy_size = 0; - - vfbs = container_of(update->vfb, typeof(*vfbs), base); - - /* - * proxy surface is special where a buffer object type fb is wrapped - * in a surface and need an update gb image command to sync with device. - */ - drm_atomic_helper_damage_iter_init(&iter, old_state, state); - drm_atomic_for_each_plane_damage(&iter, &clip) { - SVGA3dBox *box = &cmd_update->body.box; - - cmd_update->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE; - cmd_update->header.size = sizeof(cmd_update->body); - cmd_update->body.image.sid = vfbs->surface->res.id; - cmd_update->body.image.face = 0; - cmd_update->body.image.mipmap = 0; - - box->x = clip.x1; - box->y = clip.y1; - box->z = 0; - box->w = drm_rect_width(&clip); - box->h = drm_rect_height(&clip); - box->d = 1; - - copy_size += sizeof(*cmd_update); - cmd_update++; - } - - return copy_size; -} - static uint32_t vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, uint32_t num_hits) @@ -1299,7 +1298,7 @@ vmw_stdu_surface_populate_copy(struct vmw_du_update_plane *update, void *cmd, cmd_copy->header.id = SVGA_3D_CMD_SURFACE_COPY; cmd_copy->header.size = sizeof(cmd_copy->body) + sizeof(SVGA3dCopyBox) * num_hits; - cmd_copy->body.src.sid = vfbs->surface->res.id; + cmd_copy->body.src.sid = vmw_user_object_surface(&vfbs->uo)->res.id; cmd_copy->body.dest.sid = stdu->display_srf->res.id; return sizeof(*cmd_copy); @@ -1370,10 +1369,7 @@ static int vmw_stdu_plane_update_surface(struct vmw_private *dev_priv, srf_update.mutex = &dev_priv->cmdbuf_mutex; srf_update.intr = true; - if (vfbs->is_bo_proxy) - srf_update.post_prepare = vmw_stdu_surface_update_proxy; - - if (vfbs->surface->res.id != stdu->display_srf->res.id) { + if (vmw_user_object_surface(&vfbs->uo)->res.id != stdu->display_srf->res.id) { srf_update.calc_fifo_size = vmw_stdu_surface_fifo_size; srf_update.pre_clip = vmw_stdu_surface_populate_copy; srf_update.clip = vmw_stdu_surface_populate_clip; @@ -1417,7 +1413,7 @@ vmw_stdu_primary_plane_atomic_update(struct drm_plane *plane, stdu = vmw_crtc_to_stdu(crtc); dev_priv = vmw_priv(crtc->dev); - stdu->display_srf = vps->surf; + stdu->display_srf = vmw_user_object_surface(&vps->uo); stdu->content_fb_type = vps->content_fb_type; stdu->cpp = vps->cpp; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c index e7a744dfcecfd4837a0b50aaa449f329990356c8..8ae6a761c900345d1711e99c298784c1e8aefc71 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c @@ -1,7 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR MIT /************************************************************************** * - * Copyright 2009-2023 VMware, Inc., Palo Alto, CA., USA + * Copyright (c) 2009-2024 Broadcom. All Rights Reserved. The term + * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -36,9 +37,6 @@ #include #define SVGA3D_FLAGS_64(upper32, lower32) (((uint64_t)upper32 << 32) | lower32) -#define SVGA3D_FLAGS_UPPER_32(svga3d_flags) (svga3d_flags >> 32) -#define SVGA3D_FLAGS_LOWER_32(svga3d_flags) \ - (svga3d_flags & ((uint64_t)U32_MAX)) /** * struct vmw_user_surface - User-space visible surface resource @@ -686,6 +684,14 @@ static void vmw_user_surface_base_release(struct ttm_base_object **p_base) struct vmw_resource *res = &user_srf->srf.res; *p_base = NULL; + + /* + * Dumb buffers own the resource and they'll unref the + * resource themselves + */ + if (res && res->guest_memory_bo && res->guest_memory_bo->is_dumb) + return; + vmw_resource_unreference(&res); } @@ -812,7 +818,8 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, } } res->guest_memory_size = cur_bo_offset; - if (metadata->scanout && + if (!file_priv->atomic && + metadata->scanout && metadata->num_sizes == 1 && metadata->sizes[0].width == VMW_CURSOR_SNOOP_WIDTH && metadata->sizes[0].height == VMW_CURSOR_SNOOP_HEIGHT && @@ -864,6 +871,7 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data, vmw_resource_unreference(&res); goto out_unlock; } + vmw_bo_add_detached_resource(res->guest_memory_bo, res); } tmp = vmw_resource_reference(&srf->res); @@ -892,6 +900,113 @@ out_unlock: return ret; } +static struct vmw_user_surface * +vmw_lookup_user_surface_for_buffer(struct vmw_private *vmw, struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = NULL; + struct vmw_surface *surf; + struct ttm_base_object *base; + + surf = vmw_bo_surface(bo); + if (surf) { + rcu_read_lock(); + user_srf = container_of(surf, struct vmw_user_surface, srf); + base = &user_srf->prime.base; + if (base && !kref_get_unless_zero(&base->refcount)) { + drm_dbg_driver(&vmw->drm, + "%s: referencing a stale surface handle %d\n", + __func__, handle); + base = NULL; + user_srf = NULL; + } + rcu_read_unlock(); + } + + return user_srf; +} + +struct vmw_surface *vmw_lookup_surface_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + struct vmw_surface *surf = NULL; + struct ttm_base_object *base; + + if (user_srf) { + surf = vmw_surface_reference(&user_srf->srf); + base = &user_srf->prime.base; + ttm_base_object_unref(&base); + } + return surf; +} + +u32 vmw_lookup_surface_handle_for_buffer(struct vmw_private *vmw, + struct vmw_bo *bo, + u32 handle) +{ + struct vmw_user_surface *user_srf = + vmw_lookup_user_surface_for_buffer(vmw, bo, handle); + int surf_handle = 0; + struct ttm_base_object *base; + + if (user_srf) { + base = &user_srf->prime.base; + surf_handle = (u32)base->handle; + ttm_base_object_unref(&base); + } + return surf_handle; +} + +static int vmw_buffer_prime_to_surface_base(struct vmw_private *dev_priv, + struct drm_file *file_priv, + u32 fd, u32 *handle, + struct ttm_base_object **base_p) +{ + struct ttm_base_object *base; + struct vmw_bo *bo; + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_user_surface *user_srf; + int ret; + + ret = drm_gem_prime_fd_to_handle(&dev_priv->drm, file_priv, fd, handle); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to find user buffer for fd = %u.\n", fd); + return ret; + } + + ret = vmw_user_bo_lookup(file_priv, *handle, &bo); + if (ret) { + drm_warn(&dev_priv->drm, + "Wasn't able to lookup user buffer for handle = %u.\n", *handle); + return ret; + } + + user_srf = vmw_lookup_user_surface_for_buffer(dev_priv, bo, *handle); + if (WARN_ON(!user_srf)) { + drm_warn(&dev_priv->drm, + "User surface fd %d (handle %d) is null.\n", fd, *handle); + ret = -EINVAL; + goto out; + } + + base = &user_srf->prime.base; + ret = ttm_ref_object_add(tfile, base, NULL, false); + if (ret) { + drm_warn(&dev_priv->drm, + "Couldn't add an object ref for the buffer (%d).\n", *handle); + goto out; + } + + *base_p = base; +out: + vmw_user_bo_unref(&bo); + + return ret; +} static int vmw_surface_handle_reference(struct vmw_private *dev_priv, @@ -901,15 +1016,19 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv, struct ttm_base_object **base_p) { struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; - struct vmw_user_surface *user_srf; + struct vmw_user_surface *user_srf = NULL; uint32_t handle; struct ttm_base_object *base; int ret; if (handle_type == DRM_VMW_HANDLE_PRIME) { ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle); - if (unlikely(ret != 0)) - return ret; + if (ret) + return vmw_buffer_prime_to_surface_base(dev_priv, + file_priv, + u_handle, + &handle, + base_p); } else { handle = u_handle; } @@ -1503,7 +1622,12 @@ vmw_gb_surface_define_internal(struct drm_device *dev, ret = vmw_user_bo_lookup(file_priv, req->base.buffer_handle, &res->guest_memory_bo); if (ret == 0) { - if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { + if (res->guest_memory_bo->is_dumb) { + VMW_DEBUG_USER("Can't backup surface with a dumb buffer.\n"); + vmw_user_bo_unref(&res->guest_memory_bo); + ret = -EINVAL; + goto out_unlock; + } else if (res->guest_memory_bo->tbo.base.size < res->guest_memory_size) { VMW_DEBUG_USER("Surface backup buffer too small.\n"); vmw_user_bo_unref(&res->guest_memory_bo); ret = -EINVAL; @@ -1560,6 +1684,7 @@ vmw_gb_surface_define_internal(struct drm_device *dev, rep->handle = user_srf->prime.base.handle; rep->backup_size = res->guest_memory_size; if (res->guest_memory_bo) { + vmw_bo_add_detached_resource(res->guest_memory_bo, res); rep->buffer_map_handle = drm_vma_node_offset_addr(&res->guest_memory_bo->tbo.base.vma_node); rep->buffer_size = res->guest_memory_bo->tbo.base.size; @@ -2100,3 +2225,140 @@ int vmw_gb_surface_define(struct vmw_private *dev_priv, out_unlock: return ret; } + +static SVGA3dSurfaceFormat vmw_format_bpp_to_svga(struct vmw_private *vmw, + int bpp) +{ + switch (bpp) { + case 8: /* DRM_FORMAT_C8 */ + return SVGA3D_P8; + case 16: /* DRM_FORMAT_RGB565 */ + return SVGA3D_R5G6B5; + case 32: /* DRM_FORMAT_XRGB8888 */ + if (has_sm4_context(vmw)) + return SVGA3D_B8G8R8X8_UNORM; + return SVGA3D_X8R8G8B8; + default: + drm_warn(&vmw->drm, "Unsupported format bpp: %d\n", bpp); + return SVGA3D_X8R8G8B8; + } +} + +/** + * vmw_dumb_create - Create a dumb kms buffer + * + * @file_priv: Pointer to a struct drm_file identifying the caller. + * @dev: Pointer to the drm device. + * @args: Pointer to a struct drm_mode_create_dumb structure + * Return: Zero on success, negative error code on failure. + * + * This is a driver callback for the core drm create_dumb functionality. + * Note that this is very similar to the vmw_bo_alloc ioctl, except + * that the arguments have a different format. + */ +int vmw_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args) +{ + struct vmw_private *dev_priv = vmw_priv(dev); + struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile; + struct vmw_bo *vbo = NULL; + struct vmw_resource *res = NULL; + union drm_vmw_gb_surface_create_ext_arg arg = { 0 }; + struct drm_vmw_gb_surface_create_ext_req *req = &arg.req; + int ret; + struct drm_vmw_size drm_size = { + .width = args->width, + .height = args->height, + .depth = 1, + }; + SVGA3dSurfaceFormat format = vmw_format_bpp_to_svga(dev_priv, args->bpp); + const struct SVGA3dSurfaceDesc *desc = vmw_surface_get_desc(format); + SVGA3dSurfaceAllFlags flags = SVGA3D_SURFACE_HINT_TEXTURE | + SVGA3D_SURFACE_HINT_RENDERTARGET | + SVGA3D_SURFACE_SCREENTARGET | + SVGA3D_SURFACE_BIND_SHADER_RESOURCE | + SVGA3D_SURFACE_BIND_RENDER_TARGET; + + /* + * Without mob support we're just going to use raw memory buffer + * because we wouldn't be able to support full surface coherency + * without mobs + */ + if (!dev_priv->has_mob) { + int cpp = DIV_ROUND_UP(args->bpp, 8); + + switch (cpp) { + case 1: /* DRM_FORMAT_C8 */ + case 2: /* DRM_FORMAT_RGB565 */ + case 4: /* DRM_FORMAT_XRGB8888 */ + break; + default: + /* + * Dumb buffers don't allow anything else. + * This is tested via IGT's dumb_buffers + */ + return -EINVAL; + } + + args->pitch = args->width * cpp; + args->size = ALIGN(args->pitch * args->height, PAGE_SIZE); + + ret = vmw_gem_object_create_with_handle(dev_priv, file_priv, + args->size, &args->handle, + &vbo); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_put(&vbo->tbo.base); + return ret; + } + + req->version = drm_vmw_gb_surface_v1; + req->multisample_pattern = SVGA3D_MS_PATTERN_NONE; + req->quality_level = SVGA3D_MS_QUALITY_NONE; + req->buffer_byte_stride = 0; + req->must_be_zero = 0; + req->base.svga3d_flags = SVGA3D_FLAGS_LOWER_32(flags); + req->svga3d_flags_upper_32_bits = SVGA3D_FLAGS_UPPER_32(flags); + req->base.format = (uint32_t)format; + req->base.drm_surface_flags = drm_vmw_surface_flag_scanout; + req->base.drm_surface_flags |= drm_vmw_surface_flag_shareable; + req->base.drm_surface_flags |= drm_vmw_surface_flag_create_buffer; + req->base.drm_surface_flags |= drm_vmw_surface_flag_coherent; + req->base.base_size.width = args->width; + req->base.base_size.height = args->height; + req->base.base_size.depth = 1; + req->base.array_size = 0; + req->base.mip_levels = 1; + req->base.multisample_count = 0; + req->base.buffer_handle = SVGA3D_INVALID_ID; + req->base.autogen_filter = SVGA3D_TEX_FILTER_NONE; + ret = vmw_gb_surface_define_ext_ioctl(dev, &arg, file_priv); + if (ret) { + drm_warn(dev, "Unable to create a dumb buffer\n"); + return ret; + } + + args->handle = arg.rep.buffer_handle; + args->size = arg.rep.buffer_size; + args->pitch = vmw_surface_calculate_pitch(desc, &drm_size); + + ret = vmw_user_resource_lookup_handle(dev_priv, tfile, arg.rep.handle, + user_surface_converter, + &res); + if (ret) { + drm_err(dev, "Created resource handle doesn't exist!\n"); + goto err; + } + + vbo = res->guest_memory_bo; + vbo->is_dumb = true; + vbo->dumb_surface = vmw_res_to_srf(res); + +err: + if (res) + vmw_resource_unreference(&res); + if (ret) + ttm_ref_object_base_unref(tfile, arg.rep.handle); + + return ret; +} diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c index 3bfcf671fcd58303e1cbbfbfa86375afae4256ef..8651b788e98bc41f9dbba4d68f56e734defdf76e 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_vkms.c @@ -75,7 +75,7 @@ done: return ret; } -static int +static void compute_crc(struct drm_crtc *crtc, struct vmw_surface *surf, u32 *crc) @@ -101,8 +101,6 @@ compute_crc(struct drm_crtc *crtc, } vmw_bo_unmap(bo); - - return 0; } static void @@ -116,7 +114,6 @@ crc_generate_worker(struct work_struct *work) u64 frame_start, frame_end; u32 crc32 = 0; struct vmw_surface *surf = 0; - int ret; spin_lock_irq(&du->vkms.crc_state_lock); crc_pending = du->vkms.crc_pending; @@ -130,22 +127,24 @@ crc_generate_worker(struct work_struct *work) return; spin_lock_irq(&du->vkms.crc_state_lock); - surf = du->vkms.surface; + surf = vmw_surface_reference(du->vkms.surface); spin_unlock_irq(&du->vkms.crc_state_lock); - if (vmw_surface_sync(vmw, surf)) { - drm_warn(crtc->dev, "CRC worker wasn't able to sync the crc surface!\n"); - return; - } + if (surf) { + if (vmw_surface_sync(vmw, surf)) { + drm_warn( + crtc->dev, + "CRC worker wasn't able to sync the crc surface!\n"); + return; + } - ret = compute_crc(crtc, surf, &crc32); - if (ret) - return; + compute_crc(crtc, surf, &crc32); + vmw_surface_unreference(&surf); + } spin_lock_irq(&du->vkms.crc_state_lock); frame_start = du->vkms.frame_start; frame_end = du->vkms.frame_end; - crc_pending = du->vkms.crc_pending; du->vkms.frame_start = 0; du->vkms.frame_end = 0; du->vkms.crc_pending = false; @@ -164,7 +163,7 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) struct vmw_display_unit *du = container_of(timer, struct vmw_display_unit, vkms.timer); struct drm_crtc *crtc = &du->crtc; struct vmw_private *vmw = vmw_priv(crtc->dev); - struct vmw_surface *surf = NULL; + bool has_surface = false; u64 ret_overrun; bool locked, ret; @@ -179,10 +178,10 @@ vmw_vkms_vblank_simulate(struct hrtimer *timer) WARN_ON(!ret); if (!locked) return HRTIMER_RESTART; - surf = du->vkms.surface; + has_surface = du->vkms.surface != NULL; vmw_vkms_unlock(crtc); - if (du->vkms.crc_enabled && surf) { + if (du->vkms.crc_enabled && has_surface) { u64 frame = drm_crtc_accurate_vblank_count(crtc); spin_lock(&du->vkms.crc_state_lock); @@ -336,6 +335,8 @@ vmw_vkms_crtc_cleanup(struct drm_crtc *crtc) { struct vmw_display_unit *du = vmw_crtc_to_du(crtc); + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); WARN_ON(work_pending(&du->vkms.crc_generator_work)); hrtimer_cancel(&du->vkms.timer); } @@ -497,9 +498,12 @@ vmw_vkms_set_crc_surface(struct drm_crtc *crtc, struct vmw_display_unit *du = vmw_crtc_to_du(crtc); struct vmw_private *vmw = vmw_priv(crtc->dev); - if (vmw->vkms_enabled) { + if (vmw->vkms_enabled && du->vkms.surface != surf) { WARN_ON(atomic_read(&du->vkms.atomic_lock) != VMW_VKMS_LOCK_MODESET); - du->vkms.surface = surf; + if (du->vkms.surface) + vmw_surface_unreference(&du->vkms.surface); + if (surf) + du->vkms.surface = vmw_surface_reference(surf); } } diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile index 628c245c4822ebff7494e4764c93556eecf6cd8e..e97c9da451b36cfd93dcbf04a489ebcc839dabb2 100644 --- a/drivers/gpu/drm/xe/Makefile +++ b/drivers/gpu/drm/xe/Makefile @@ -25,12 +25,14 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \ uses_generated_oob := \ $(obj)/xe_ggtt.o \ + $(obj)/xe_device.o \ $(obj)/xe_gsc.o \ $(obj)/xe_gt.o \ $(obj)/xe_guc.o \ $(obj)/xe_guc_ads.o \ $(obj)/xe_guc_pc.o \ $(obj)/xe_migrate.o \ + $(obj)/xe_pat.o \ $(obj)/xe_ring_ops.o \ $(obj)/xe_vm.o \ $(obj)/xe_wa.o \ diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c index 8b83dcff72e17a41d7695c76c8f8c8a9d5508294..49de4e4f8a75b01b370a699bfd38e1308bafd7d2 100644 --- a/drivers/gpu/drm/xe/display/xe_display.c +++ b/drivers/gpu/drm/xe/display/xe_display.c @@ -132,6 +132,7 @@ static void xe_display_fini_noirq(void *arg) return; intel_display_driver_remove_noirq(xe); + intel_opregion_cleanup(xe); } int xe_display_init_noirq(struct xe_device *xe) @@ -157,8 +158,10 @@ int xe_display_init_noirq(struct xe_device *xe) intel_display_device_info_runtime_init(xe); err = intel_display_driver_probe_noirq(xe); - if (err) + if (err) { + intel_opregion_cleanup(xe); return err; + } return devm_add_action_or_reset(xe->drm.dev, xe_display_fini_noirq, xe); } @@ -280,6 +283,27 @@ static bool suspend_to_idle(void) return false; } +static void xe_display_flush_cleanup_work(struct xe_device *xe) +{ + struct intel_crtc *crtc; + + for_each_intel_crtc(&xe->drm, crtc) { + struct drm_crtc_commit *commit; + + spin_lock(&crtc->base.commit_lock); + commit = list_first_entry_or_null(&crtc->base.commit_list, + struct drm_crtc_commit, commit_entry); + if (commit) + drm_crtc_commit_get(commit); + spin_unlock(&crtc->base.commit_lock); + + if (commit) { + wait_for_completion(&commit->cleanup_done); + drm_crtc_commit_put(commit); + } + } +} + void xe_display_pm_suspend(struct xe_device *xe, bool runtime) { bool s2idle = suspend_to_idle(); @@ -297,6 +321,8 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime) if (!runtime) intel_display_driver_suspend(xe); + xe_display_flush_cleanup_work(xe); + intel_dp_mst_suspend(xe); intel_hpd_cancel_work(xe); diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c index 9e860c61f4b33f7ed19a579ab47406d6ec47ffb9..ccd0d87d438a3af400afb1b1d089f4f46b6fefcb 100644 --- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c +++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c @@ -7,6 +7,8 @@ #include "intel_display_types.h" #include "intel_dsb_buffer.h" #include "xe_bo.h" +#include "xe_device.h" +#include "xe_device_types.h" #include "xe_gt.h" u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) @@ -16,7 +18,10 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf) void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val); + xe_device_l2_flush(xe); } u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) @@ -26,9 +31,12 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx) void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size) { + struct xe_device *xe = dsb_buf->vma->bo->tile->xe; + WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf)); iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size); + xe_device_l2_flush(xe); } bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size) diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c index 423f367c7065ed0386664b91c44a017aa22e96b6..d7db44e79eaf55f5227563c634dcb97e7735a6aa 100644 --- a/drivers/gpu/drm/xe/display/xe_fb_pin.c +++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c @@ -10,6 +10,7 @@ #include "intel_fb.h" #include "intel_fb_pin.h" #include "xe_bo.h" +#include "xe_device.h" #include "xe_ggtt.h" #include "xe_gt.h" #include "xe_pm.h" @@ -304,6 +305,8 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb, if (ret) goto err_unpin; + /* Ensure DPT writes are flushed */ + xe_device_l2_flush(xe); return vma; err_unpin: diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index d44564bad00949ae6b37fe7ece8162ddb62cc80a..3c28650400586281ebec4c3ea147827c769c465a 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -80,6 +80,9 @@ #define LE_CACHEABILITY_MASK REG_GENMASK(1, 0) #define LE_CACHEABILITY(value) REG_FIELD_PREP(LE_CACHEABILITY_MASK, value) +#define XE2_GAMREQSTRM_CTRL XE_REG(0x4194) +#define CG_DIS_CNTLBUS REG_BIT(6) + #define CCS_AUX_INV XE_REG(0x4208) #define VD0_AUX_INV XE_REG(0x4218) @@ -372,6 +375,11 @@ #define XEHPC_L3CLOS_MASK(i) XE_REG_MCR(0xb194 + (i) * 8) +#define XE2_GLOBAL_INVAL XE_REG(0xb404) + +#define SCRATCH1LPFC XE_REG(0xb474) +#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) + #define XE2LPM_L3SQCREG5 XE_REG_MCR(0xb658) #define XE2_TDF_CTRL XE_REG(0xb418) @@ -429,6 +437,7 @@ #define DIS_FIX_EOT1_FLUSH REG_BIT(9) #define TDL_TSL_CHICKEN XE_REG_MCR(0xe4c4, XE_REG_OPTION_MASKED) +#define STK_ID_RESTRICT REG_BIT(12) #define SLM_WMTP_RESTORE REG_BIT(11) #define ROW_CHICKEN XE_REG_MCR(0xe4f0, XE_REG_OPTION_MASKED) diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 31192d983d9e195531def2437d37a049795deea7..261d3d6c8a9315e84aa963dc8dec8e4795bbf168 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -1575,7 +1575,7 @@ struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile, return bo; } -static void __xe_bo_unpin_map_no_vm(struct drm_device *drm, void *arg) +static void __xe_bo_unpin_map_no_vm(void *arg) { xe_bo_unpin_map_no_vm(arg); } @@ -1590,7 +1590,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile if (IS_ERR(bo)) return bo; - ret = drmm_add_action_or_reset(&xe->drm, __xe_bo_unpin_map_no_vm, bo); + ret = devm_add_action_or_reset(xe->drm.dev, __xe_bo_unpin_map_no_vm, bo); if (ret) return ERR_PTR(ret); @@ -1638,7 +1638,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str if (IS_ERR(bo)) return PTR_ERR(bo); - drmm_release_action(&xe->drm, __xe_bo_unpin_map_no_vm, *src); + devm_release_action(xe->drm.dev, __xe_bo_unpin_map_no_vm, *src); *src = bo; return 0; diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 76109415eba61e225f8f6ebbfd473bb52ae96e82..c89deffffb6d06019ee17c293c0724546dc66a03 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -54,6 +54,9 @@ #include "xe_vm.h" #include "xe_vram.h" #include "xe_wait_user_fence.h" +#include "xe_wa.h" + +#include static int xe_file_open(struct drm_device *dev, struct drm_file *file) { @@ -87,9 +90,55 @@ static int xe_file_open(struct drm_device *dev, struct drm_file *file) spin_unlock(&xe->clients.lock); file->driver_priv = xef; + kref_init(&xef->refcount); + return 0; } +static void xe_file_destroy(struct kref *ref) +{ + struct xe_file *xef = container_of(ref, struct xe_file, refcount); + struct xe_device *xe = xef->xe; + + xa_destroy(&xef->exec_queue.xa); + mutex_destroy(&xef->exec_queue.lock); + xa_destroy(&xef->vm.xa); + mutex_destroy(&xef->vm.lock); + + spin_lock(&xe->clients.lock); + xe->clients.count--; + spin_unlock(&xe->clients.lock); + + xe_drm_client_put(xef->client); + kfree(xef); +} + +/** + * xe_file_get() - Take a reference to the xe file object + * @xef: Pointer to the xe file + * + * Anyone with a pointer to xef must take a reference to the xe file + * object using this call. + * + * Return: xe file pointer + */ +struct xe_file *xe_file_get(struct xe_file *xef) +{ + kref_get(&xef->refcount); + return xef; +} + +/** + * xe_file_put() - Drop a reference to the xe file object + * @xef: Pointer to the xe file + * + * Used to drop reference to the xef object + */ +void xe_file_put(struct xe_file *xef) +{ + kref_put(&xef->refcount, xe_file_destroy); +} + static void xe_file_close(struct drm_device *dev, struct drm_file *file) { struct xe_device *xe = to_xe_device(dev); @@ -98,6 +147,8 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) struct xe_exec_queue *q; unsigned long idx; + xe_pm_runtime_get(xe); + /* * No need for exec_queue.lock here as there is no contention for it * when FD is closing as IOCTLs presumably can't be modifying the @@ -108,21 +159,14 @@ static void xe_file_close(struct drm_device *dev, struct drm_file *file) xe_exec_queue_kill(q); xe_exec_queue_put(q); } - xa_destroy(&xef->exec_queue.xa); - mutex_destroy(&xef->exec_queue.lock); mutex_lock(&xef->vm.lock); xa_for_each(&xef->vm.xa, idx, vm) xe_vm_close_and_put(vm); mutex_unlock(&xef->vm.lock); - xa_destroy(&xef->vm.xa); - mutex_destroy(&xef->vm.lock); - spin_lock(&xe->clients.lock); - xe->clients.count--; - spin_unlock(&xe->clients.lock); + xe_file_put(xef); - xe_drm_client_put(xef->client); - kfree(xef); + xe_pm_runtime_put(xe); } static const struct drm_ioctl_desc xe_ioctls[] = { @@ -779,6 +823,11 @@ void xe_device_td_flush(struct xe_device *xe) if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20) return; + if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) { + xe_device_l2_flush(xe); + return; + } + for_each_gt(gt, xe, id) { if (xe_gt_is_media_type(gt)) continue; @@ -802,6 +851,30 @@ void xe_device_td_flush(struct xe_device *xe) } } +void xe_device_l2_flush(struct xe_device *xe) +{ + struct xe_gt *gt; + int err; + + gt = xe_root_mmio_gt(xe); + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (err) + return; + + spin_lock(>->global_invl_lock); + xe_mmio_write32(gt, XE2_GLOBAL_INVAL, 0x1); + + if (xe_mmio_wait32(gt, XE2_GLOBAL_INVAL, 0x1, 0x0, 150, NULL, true)) + xe_gt_err_once(gt, "Global invalidation timeout\n"); + spin_unlock(>->global_invl_lock); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size) { return xe_device_has_flat_ccs(xe) ? diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h index bb07f5669dbb3514925adc29f1bc18fbd95ba345..533ccfb2567a2c320699cc1136dfaa4d4a45dce4 100644 --- a/drivers/gpu/drm/xe/xe_device.h +++ b/drivers/gpu/drm/xe/xe_device.h @@ -162,6 +162,7 @@ u64 xe_device_canonicalize_addr(struct xe_device *xe, u64 address); u64 xe_device_uncanonicalize_addr(struct xe_device *xe, u64 address); void xe_device_td_flush(struct xe_device *xe); +void xe_device_l2_flush(struct xe_device *xe); static inline bool xe_device_wedged(struct xe_device *xe) { @@ -170,4 +171,7 @@ static inline bool xe_device_wedged(struct xe_device *xe) void xe_device_declare_wedged(struct xe_device *xe); +struct xe_file *xe_file_get(struct xe_file *xef); +void xe_file_put(struct xe_file *xef); + #endif diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h index 3bca6d344744ac604bb4e50a07fb7ee31c40dbbe..cbc582bcc90a5fb1ee77e3cbd3e02014581dd7b4 100644 --- a/drivers/gpu/drm/xe/xe_device_types.h +++ b/drivers/gpu/drm/xe/xe_device_types.h @@ -566,6 +566,9 @@ struct xe_file { /** @client: drm client */ struct xe_drm_client *client; + + /** @refcount: ref count of this xe file */ + struct kref refcount; }; #endif diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c index 6a26923fa10e0f0e6864d14d3e86992e0a40f75a..7ddd59908334c54ac68ba32048db966871fc5cdb 100644 --- a/drivers/gpu/drm/xe/xe_drm_client.c +++ b/drivers/gpu/drm/xe/xe_drm_client.c @@ -251,11 +251,8 @@ static void show_run_ticks(struct drm_printer *p, struct drm_file *file) /* Accumulate all the exec queues from this client */ mutex_lock(&xef->exec_queue.lock); - xa_for_each(&xef->exec_queue.xa, i, q) { + xa_for_each(&xef->exec_queue.xa, i, q) xe_exec_queue_update_run_ticks(q); - xef->run_ticks[q->class] += q->run_ticks - q->old_run_ticks; - q->old_run_ticks = q->run_ticks; - } mutex_unlock(&xef->exec_queue.lock); /* Get the total GPU cycles */ diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c index 0ba37835849b016975b0796937154040d76fde34..9731dcd0b1bde9cffdcfac70fe4578bd7e640364 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue.c +++ b/drivers/gpu/drm/xe/xe_exec_queue.c @@ -37,6 +37,10 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q) { if (q->vm) xe_vm_put(q->vm); + + if (q->xef) + xe_file_put(q->xef); + kfree(q); } @@ -101,22 +105,35 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe, static int __xe_exec_queue_init(struct xe_exec_queue *q) { + struct xe_vm *vm = q->vm; int i, err; + if (vm) { + err = xe_vm_lock(vm, true); + if (err) + return err; + } + for (i = 0; i < q->width; ++i) { q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K); if (IS_ERR(q->lrc[i])) { err = PTR_ERR(q->lrc[i]); - goto err_lrc; + goto err_unlock; } } + if (vm) + xe_vm_unlock(vm); + err = q->ops->init(q); if (err) goto err_lrc; return 0; +err_unlock: + if (vm) + xe_vm_unlock(vm); err_lrc: for (i = i - 1; i >= 0; --i) xe_lrc_put(q->lrc[i]); @@ -136,15 +153,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v if (IS_ERR(q)) return q; - if (vm) { - err = xe_vm_lock(vm, true); - if (err) - goto err_post_alloc; - } - err = __xe_exec_queue_init(q); - if (vm) - xe_vm_unlock(vm); if (err) goto err_post_alloc; @@ -634,7 +643,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, if (xe_vm_in_preempt_fence_mode(vm)) { q->lr.context = dma_fence_context_alloc(1); - spin_lock_init(&q->lr.lock); err = xe_vm_add_compute_exec_queue(vm, q); if (XE_IOCTL_DBG(xe, err)) @@ -649,6 +657,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data, goto kill_exec_queue; args->exec_queue_id = id; + q->xef = xe_file_get(xef); return 0; @@ -762,6 +771,7 @@ bool xe_exec_queue_is_idle(struct xe_exec_queue *q) */ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) { + struct xe_file *xef; struct xe_lrc *lrc; u32 old_ts, new_ts; @@ -773,6 +783,8 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) if (!q->vm || !q->vm->xef) return; + xef = q->vm->xef; + /* * Only sample the first LRC. For parallel submission, all of them are * scheduled together and we compensate that below by multiplying by @@ -783,7 +795,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q) */ lrc = q->lrc[0]; new_ts = xe_lrc_update_timestamp(lrc, &old_ts); - q->run_ticks += (new_ts - old_ts) * q->width; + xef->run_ticks[q->class] += (new_ts - old_ts) * q->width; } void xe_exec_queue_kill(struct xe_exec_queue *q) diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h index 201588ec33c35955ba6223a960d94794653b6a8d..f6ee0ae80fd63dbe63ba461ec60c029bc5534bb5 100644 --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h @@ -38,6 +38,9 @@ enum xe_exec_queue_priority { * a kernel object. */ struct xe_exec_queue { + /** @xef: Back pointer to xe file if this is user created exec queue */ + struct xe_file *xef; + /** @gt: graphics tile this exec queue can submit to */ struct xe_gt *gt; /** @@ -123,8 +126,6 @@ struct xe_exec_queue { u32 seqno; /** @lr.link: link into VM's list of exec queues */ struct list_head link; - /** @lr.lock: preemption fences lock */ - spinlock_t lock; } lr; /** @ops: submission backend exec queue operations */ @@ -139,10 +140,6 @@ struct xe_exec_queue { * Protected by @vm's resv. Unused if @vm == NULL. */ u64 tlb_flush_seqno; - /** @old_run_ticks: prior hw engine class run time in ticks for this exec queue */ - u64 old_run_ticks; - /** @run_ticks: hw engine class run time in ticks for this exec queue */ - u64 run_ticks; /** @lrc: logical ring context for this exec queue */ struct xe_lrc *lrc[]; }; diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c index f8239a13fa2b882f2fb8245f88abb2c53ad3d9b2..2a612652bb1381940238599424ddd424011859d9 100644 --- a/drivers/gpu/drm/xe/xe_gsc.c +++ b/drivers/gpu/drm/xe/xe_gsc.c @@ -260,7 +260,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) struct xe_tile *tile = gt_to_tile(gt); int ret; - if (XE_WA(gt, 14018094691)) { + if (XE_WA(tile->primary_gt, 14018094691)) { ret = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); /* @@ -278,7 +278,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc) ret = gsc_upload(gsc); - if (XE_WA(gt, 14018094691)) + if (XE_WA(tile->primary_gt, 14018094691)) xe_force_wake_put(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL); if (ret) @@ -437,7 +437,7 @@ out: return ret; } -static void free_resources(struct drm_device *drm, void *arg) +static void free_resources(void *arg) { struct xe_gsc *gsc = arg; @@ -501,7 +501,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc) gsc->q = q; gsc->wq = wq; - err = drmm_add_action_or_reset(&xe->drm, free_resources, gsc); + err = devm_add_action_or_reset(xe->drm.dev, free_resources, gsc); if (err) return err; diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index 31b2e64c70c6abf9bc489eaf41d9f30b997ab766..b9bcbbe27705ffdbf1bb9cba276c478857ded24c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -11,6 +11,8 @@ #include #include +#include + #include "instructions/xe_gfxpipe_commands.h" #include "instructions/xe_mi_commands.h" #include "regs/xe_gt_regs.h" @@ -95,6 +97,51 @@ void xe_gt_sanitize(struct xe_gt *gt) gt->uc.guc.submission_state.enabled = false; } +static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + if (!xe_gt_is_media_type(gt)) { + xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg |= CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + } + + xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3); + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + +static void xe_gt_disable_host_l2_vram(struct xe_gt *gt) +{ + u32 reg; + int err; + + if (!XE_WA(gt, 16023588340)) + return; + + if (xe_gt_is_media_type(gt)) + return; + + err = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT); + if (WARN_ON(err)) + return; + + reg = xe_mmio_read32(gt, XE2_GAMREQSTRM_CTRL); + reg &= ~CG_DIS_CNTLBUS; + xe_mmio_write32(gt, XE2_GAMREQSTRM_CTRL, reg); + + xe_force_wake_put(gt_to_fw(gt), XE_FW_GT); +} + /** * xe_gt_remove() - Clean up the GT structures before driver removal * @gt: the GT object @@ -111,6 +158,8 @@ void xe_gt_remove(struct xe_gt *gt) for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i) xe_hw_fence_irq_finish(>->fence_irq[i]); + + xe_gt_disable_host_l2_vram(gt); } static void gt_reset_worker(struct work_struct *w); @@ -339,6 +388,7 @@ int xe_gt_init_early(struct xe_gt *gt) xe_force_wake_init_gt(gt, gt_to_fw(gt)); xe_pcode_init(gt); + spin_lock_init(>->global_invl_lock); return 0; } @@ -508,6 +558,7 @@ int xe_gt_init_hwconfig(struct xe_gt *gt) xe_gt_mcr_init_early(gt); xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); err = xe_uc_init(>->uc); if (err) @@ -643,6 +694,8 @@ static int do_gt_restart(struct xe_gt *gt) xe_pat_init(gt); + xe_gt_enable_host_l2_vram(gt); + xe_gt_mcr_set_implicit_defaults(gt); xe_reg_sr_apply_mmio(>->reg_sr, gt); @@ -796,6 +849,8 @@ int xe_gt_suspend(struct xe_gt *gt) xe_gt_idle_disable_pg(gt); + xe_gt_disable_host_l2_vram(gt); + XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL)); xe_gt_dbg(gt, "suspended\n"); diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c index 9292d54688684eb93aee178e07c29dbccce340a9..b2a7fa55bd1815ebf41f3beac60b74c03290211e 100644 --- a/drivers/gpu/drm/xe/xe_gt_pagefault.c +++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c @@ -382,6 +382,18 @@ static void pf_queue_work_func(struct work_struct *w) static void acc_queue_work_func(struct work_struct *w); +static void pagefault_fini(void *arg) +{ + struct xe_gt *gt = arg; + struct xe_device *xe = gt_to_xe(gt); + + if (!xe->info.has_usm) + return; + + destroy_workqueue(gt->usm.acc_wq); + destroy_workqueue(gt->usm.pf_wq); +} + int xe_gt_pagefault_init(struct xe_gt *gt) { struct xe_device *xe = gt_to_xe(gt); @@ -409,10 +421,12 @@ int xe_gt_pagefault_init(struct xe_gt *gt) gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue", WQ_UNBOUND | WQ_HIGHPRI, NUM_ACC_QUEUE); - if (!gt->usm.acc_wq) + if (!gt->usm.acc_wq) { + destroy_workqueue(gt->usm.pf_wq); return -ENOMEM; + } - return 0; + return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt); } void xe_gt_pagefault_reset(struct xe_gt *gt) diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c index 4699b78360013c1e16805d30aa9d5d306e43c321..b6f0a7299c030915cc9f556fa3c6a22705c369f7 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c @@ -1927,6 +1927,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) { struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt; struct xe_device *xe = gt_to_xe(gt); + bool is_primary = !xe_gt_is_media_type(gt); bool valid_ggtt, valid_ctxs, valid_dbs; bool valid_any, valid_all; @@ -1935,13 +1936,17 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid) valid_dbs = pf_get_vf_config_dbs(gt, vfid); /* note that GuC doorbells are optional */ - valid_any = valid_ggtt || valid_ctxs || valid_dbs; - valid_all = valid_ggtt && valid_ctxs; + valid_any = valid_ctxs || valid_dbs; + valid_all = valid_ctxs; + + /* and GGTT/LMEM is configured on primary GT only */ + valid_all = valid_all && valid_ggtt; + valid_any = valid_any || (valid_ggtt && is_primary); if (IS_DGFX(xe)) { bool valid_lmem = pf_get_vf_config_ggtt(primary_gt, vfid); - valid_any = valid_any || valid_lmem; + valid_any = valid_any || (valid_lmem && is_primary); valid_all = valid_all && valid_lmem; } diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c index 41e46a00c01e89e28670f9d0518e10e5637fc29d..8892d6c2291ebb95ef95faabcbeafd9a36fbd0d3 100644 --- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c +++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c @@ -850,7 +850,7 @@ static struct vf_runtime_reg *vf_lookup_reg(struct xe_gt *gt, u32 addr) xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt))); - return bsearch(&key, runtime->regs, runtime->regs_size, sizeof(key), + return bsearch(&key, runtime->regs, runtime->num_regs, sizeof(key), vf_runtime_reg_cmp); } diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index d9359976ab8bb6cf6ef25035663036a2fcb37c19..481d83d07367d23df42d7fd97fbae65617bee94e 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -13,10 +13,13 @@ #include "xe_guc.h" #include "xe_guc_ct.h" #include "xe_mmio.h" +#include "xe_pm.h" #include "xe_sriov.h" #include "xe_trace.h" #include "regs/xe_guc_regs.h" +#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS + /* * TLB inval depends on pending commands in the CT queue and then the real * invalidation time. Double up the time to process full CT queue @@ -33,6 +36,24 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void +__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags); + + trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); + xe_gt_tlb_invalidation_fence_fini(fence); + dma_fence_signal(&fence->base); + if (!stack) + dma_fence_put(&fence->base); +} + +static void +invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) +{ + list_del(&fence->link); + __invalidation_fence_signal(xe, fence); +} static void xe_gt_tlb_fence_timeout(struct work_struct *work) { @@ -54,10 +75,8 @@ static void xe_gt_tlb_fence_timeout(struct work_struct *work) xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d", fence->seqno, gt->tlb_invalidation.seqno_recv); - list_del(&fence->link); fence->base.error = -ETIME; - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); + invalidation_fence_signal(xe, fence); } if (!list_empty(>->tlb_invalidation.pending_fences)) queue_delayed_work(system_wq, @@ -87,21 +106,6 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt) return 0; } -static void -__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - trace_xe_gt_tlb_invalidation_fence_signal(xe, fence); - dma_fence_signal(&fence->base); - dma_fence_put(&fence->base); -} - -static void -invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) -{ - list_del(&fence->link); - __invalidation_fence_signal(xe, fence); -} - /** * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset * @gt: graphics tile @@ -111,7 +115,6 @@ invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fe void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) { struct xe_gt_tlb_invalidation_fence *fence, *next; - struct xe_guc *guc = >->uc.guc; int pending_seqno; /* @@ -134,7 +137,6 @@ void xe_gt_tlb_invalidation_reset(struct xe_gt *gt) else pending_seqno = gt->tlb_invalidation.seqno - 1; WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) @@ -165,6 +167,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, int seqno; int ret; + xe_gt_assert(gt, fence); + /* * XXX: The seqno algorithm relies on TLB invalidation being processed * in order which they currently are, if that changes the algorithm will @@ -173,10 +177,8 @@ static int send_tlb_invalidation(struct xe_guc *guc, mutex_lock(&guc->ct.lock); seqno = gt->tlb_invalidation.seqno; - if (fence) { - fence->seqno = seqno; - trace_xe_gt_tlb_invalidation_fence_send(xe, fence); - } + fence->seqno = seqno; + trace_xe_gt_tlb_invalidation_fence_send(xe, fence); action[1] = seqno; ret = xe_guc_ct_send_locked(&guc->ct, action, len, G2H_LEN_DW_TLB_INVALIDATE, 1); @@ -209,7 +211,6 @@ static int send_tlb_invalidation(struct xe_guc *guc, TLB_INVALIDATION_SEQNO_MAX; if (!gt->tlb_invalidation.seqno) gt->tlb_invalidation.seqno = 1; - ret = seqno; } mutex_unlock(&guc->ct.lock); @@ -223,14 +224,16 @@ static int send_tlb_invalidation(struct xe_guc *guc, /** * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC * @gt: graphics tile + * @fence: invalidation fence which will be signal on TLB invalidation + * completion * * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and - * caller can use seqno + xe_gt_tlb_invalidation_wait to wait for completion. + * caller can use the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: 0 on success, negative error code on error */ -static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) +static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence) { u32 action[] = { XE_GUC_ACTION_TLB_INVALIDATION, @@ -238,7 +241,7 @@ static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt) MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC), }; - return send_tlb_invalidation(>->uc.guc, NULL, action, + return send_tlb_invalidation(>->uc.guc, fence, action, ARRAY_SIZE(action)); } @@ -257,13 +260,17 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) if (xe_guc_ct_enabled(>->uc.guc.ct) && gt->uc.guc.submission_state.enabled) { - int seqno; - - seqno = xe_gt_tlb_invalidation_guc(gt); - if (seqno <= 0) - return seqno; + struct xe_gt_tlb_invalidation_fence fence; + int ret; + + xe_gt_tlb_invalidation_fence_init(gt, &fence, true); + ret = xe_gt_tlb_invalidation_guc(gt, &fence); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence); + return ret; + } - xe_gt_tlb_invalidation_wait(gt, seqno); + xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { if (IS_SRIOV_VF(xe)) return 0; @@ -290,18 +297,16 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) * * @gt: graphics tile * @fence: invalidation fence which will be signal on TLB invalidation - * completion, can be NULL + * completion * @start: start address * @end: end address * @asid: address space id * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -312,11 +317,11 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, u32 action[MAX_TLB_INVALIDATION_LEN]; int len = 0; + xe_gt_assert(gt, fence); + /* Execlists not supported */ if (gt_to_xe(gt)->info.force_execlist) { - if (fence) - __invalidation_fence_signal(xe, fence); - + __invalidation_fence_signal(xe, fence); return 0; } @@ -382,12 +387,10 @@ int xe_gt_tlb_invalidation_range(struct xe_gt *gt, * @vma: VMA to invalidate * * Issue a range based TLB invalidation if supported, if not fallback to a full - * TLB invalidation. Completion of TLB is asynchronous and caller can either use - * the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for - * completion. + * TLB invalidation. Completion of TLB is asynchronous and caller can use + * the invalidation fence to wait for completion. * - * Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success, - * negative error code on error. + * Return: Negative error code on error, 0 on success */ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -400,43 +403,6 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, xe_vma_vm(vma)->usm.asid); } -/** - * xe_gt_tlb_invalidation_wait - Wait for TLB to complete - * @gt: graphics tile - * @seqno: seqno to wait which was returned from xe_gt_tlb_invalidation - * - * Wait for tlb_timeout_jiffies() for a TLB invalidation to complete. - * - * Return: 0 on success, -ETIME on TLB invalidation timeout - */ -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno) -{ - struct xe_guc *guc = >->uc.guc; - int ret; - - /* Execlists not supported */ - if (gt_to_xe(gt)->info.force_execlist) - return 0; - - /* - * XXX: See above, this algorithm only works if seqno are always in - * order - */ - ret = wait_event_timeout(guc->ct.wq, - tlb_invalidation_seqno_past(gt, seqno), - tlb_timeout_jiffies(gt)); - if (!ret) { - struct drm_printer p = xe_gt_err_printer(gt); - - xe_gt_err(gt, "TLB invalidation time'd out, seqno=%d, recv=%d\n", - seqno, gt->tlb_invalidation.seqno_recv); - xe_guc_ct_print(&guc->ct, &p, true); - return -ETIME; - } - - return 0; -} - /** * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler * @guc: guc @@ -480,12 +446,7 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } - /* - * wake_up_all() and wait_event_timeout() already have the correct - * barriers. - */ WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]); - wake_up_all(&guc->ct.wq); list_for_each_entry_safe(fence, next, >->tlb_invalidation.pending_fences, link) { @@ -508,3 +469,59 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len) return 0; } + +static const char * +invalidation_fence_get_driver_name(struct dma_fence *dma_fence) +{ + return "xe"; +} + +static const char * +invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) +{ + return "invalidation_fence"; +} + +static const struct dma_fence_ops invalidation_fence_ops = { + .get_driver_name = invalidation_fence_get_driver_name, + .get_timeline_name = invalidation_fence_get_timeline_name, +}; + +/** + * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence + * @gt: GT + * @fence: TLB invalidation fence to initialize + * @stack: fence is stack variable + * + * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini + * must be called if fence is not signaled. + */ +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack) +{ + xe_pm_runtime_get_noresume(gt_to_xe(gt)); + + spin_lock_irq(>->tlb_invalidation.lock); + dma_fence_init(&fence->base, &invalidation_fence_ops, + >->tlb_invalidation.lock, + dma_fence_context_alloc(1), 1); + spin_unlock_irq(>->tlb_invalidation.lock); + INIT_LIST_HEAD(&fence->link); + if (stack) + set_bit(FENCE_STACK_BIT, &fence->base.flags); + else + dma_fence_get(&fence->base); + fence->gt = gt; +} + +/** + * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence + * @fence: TLB invalidation fence to finalize + * + * Drop PM ref which fence took durinig init. + */ +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + xe_pm_runtime_put(gt_to_xe(fence->gt)); +} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index bf3bebd9f985ba6b04fe73d67611e295de665c65..a84065fa324c766ad5c81889333bb738c409684b 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -23,7 +23,17 @@ int xe_gt_tlb_invalidation_vma(struct xe_gt *gt, int xe_gt_tlb_invalidation_range(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, u64 start, u64 end, u32 asid); -int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno); int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); +void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, + struct xe_gt_tlb_invalidation_fence *fence, + bool stack); +void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); + +static inline void +xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) +{ + dma_fence_wait(&fence->base, false); +} + #endif /* _XE_GT_TLB_INVALIDATION_ */ diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h index 934c828efe31cb728d7e751555e9525233f30ae6..de6e825e0851e5097443b0ebec0cbddaa75cb3d6 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h @@ -8,6 +8,8 @@ #include +struct xe_gt; + /** * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence * @@ -17,6 +19,8 @@ struct xe_gt_tlb_invalidation_fence { /** @base: dma fence base */ struct dma_fence base; + /** @gt: GT which fence belong to */ + struct xe_gt *gt; /** @link: link into list of pending tlb fences */ struct list_head link; /** @seqno: seqno of TLB invalidation to signal fence one */ diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 6b5e0b45efb0c929218f988146983630c252fbc9..38a0d0e178c8f77a322c3f2aec9b3232e886afd3 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -362,6 +362,12 @@ struct xe_gt { */ spinlock_t mcr_lock; + /** + * @global_invl_lock: protects the register for the duration + * of a global invalidation of l2 cache + */ + spinlock_t global_invl_lock; + /** @wa_active: keep track of active workarounds */ struct { /** @wa_active.gt: bitmap with active GT workarounds */ diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c index 7d2e937da1d83d5f8606277c1e0cd1a74d0fe7e6..64afc90ad2c51942069efcd5ac078962a11bcce3 100644 --- a/drivers/gpu/drm/xe/xe_guc_ct.c +++ b/drivers/gpu/drm/xe/xe_guc_ct.c @@ -327,6 +327,8 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct, xe_gt_assert(ct_to_gt(ct), ct->g2h_outstanding == 0 || state == XE_GUC_CT_STATE_STOPPED); + if (ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); ct->g2h_outstanding = 0; ct->state = state; @@ -495,10 +497,15 @@ static void h2g_reserve_space(struct xe_guc_ct *ct, u32 cmd_len) static void __g2h_reserve_space(struct xe_guc_ct *ct, u32 g2h_len, u32 num_g2h) { xe_gt_assert(ct_to_gt(ct), g2h_len <= ct->ctbs.g2h.info.space); + xe_gt_assert(ct_to_gt(ct), (!g2h_len && !num_g2h) || + (g2h_len && num_g2h)); if (g2h_len) { lockdep_assert_held(&ct->fast_lock); + if (!ct->g2h_outstanding) + xe_pm_runtime_get_noresume(ct_to_xe(ct)); + ct->ctbs.g2h.info.space -= g2h_len; ct->g2h_outstanding += num_g2h; } @@ -511,7 +518,8 @@ static void __g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) ct->ctbs.g2h.info.size - ct->ctbs.g2h.info.resv_space); ct->ctbs.g2h.info.space += g2h_len; - --ct->g2h_outstanding; + if (!--ct->g2h_outstanding) + xe_pm_runtime_put(ct_to_xe(ct)); } static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 8d7e7f4bbff7168ab02d328067fe517a6bc7a5de..77b0f0d8f7297ee3a3323b57b8ff65627d90b63e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -284,7 +284,7 @@ static void guc_submit_fini(struct drm_device *drm, void *arg) free_submit_wq(guc); } -static void guc_submit_wedged_fini(struct drm_device *drm, void *arg) +static void guc_submit_wedged_fini(void *arg) { struct xe_guc *guc = arg; struct xe_exec_queue *q; @@ -877,7 +877,7 @@ void xe_guc_submit_wedge(struct xe_guc *guc) xe_gt_assert(guc_to_gt(guc), guc_to_xe(guc)->wedged.mode); - err = drmm_add_action_or_reset(&guc_to_xe(guc)->drm, + err = devm_add_action_or_reset(guc_to_xe(guc)->drm.dev, guc_submit_wedged_fini, guc); if (err) { drm_err(&xe->drm, "Failed to register xe_guc_submit clean-up on wedged.mode=2. Although device is wedged.\n"); @@ -1393,6 +1393,8 @@ static void guc_exec_queue_process_msg(struct xe_sched_msg *msg) default: XE_WARN_ON("Unknown message type"); } + + xe_pm_runtime_put(guc_to_xe(exec_queue_to_guc(msg->private_data))); } static const struct drm_sched_backend_ops drm_sched_ops = { @@ -1482,6 +1484,8 @@ static void guc_exec_queue_kill(struct xe_exec_queue *q) static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg *msg, u32 opcode) { + xe_pm_runtime_get_noresume(guc_to_xe(exec_queue_to_guc(q))); + INIT_LIST_HEAD(&msg->link); msg->opcode = opcode; msg->private_data = q; diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c index 45a9789cf50191bdbccbf47b4e9bd5de3f23c92f..0b4f12be3692ab206d623c3403d8c9c69fe4f2c2 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence.c +++ b/drivers/gpu/drm/xe/xe_hw_fence.c @@ -148,20 +148,20 @@ static const char *xe_hw_fence_get_driver_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return dev_name(gt_to_xe(fence->ctx->gt)->drm.dev); + return dev_name(fence->xe->drm.dev); } static const char *xe_hw_fence_get_timeline_name(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - return fence->ctx->name; + return fence->name; } static bool xe_hw_fence_signaled(struct dma_fence *dma_fence) { struct xe_hw_fence *fence = to_xe_hw_fence(dma_fence); - struct xe_device *xe = gt_to_xe(fence->ctx->gt); + struct xe_device *xe = fence->xe; u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32); return dma_fence->error || @@ -253,7 +253,8 @@ void xe_hw_fence_init(struct dma_fence *fence, struct xe_hw_fence_ctx *ctx, struct xe_hw_fence *hw_fence = container_of(fence, typeof(*hw_fence), dma); - hw_fence->ctx = ctx; + hw_fence->xe = gt_to_xe(ctx->gt); + snprintf(hw_fence->name, sizeof(hw_fence->name), "%s", ctx->name); hw_fence->seqno_map = seqno_map; INIT_LIST_HEAD(&hw_fence->irq_link); diff --git a/drivers/gpu/drm/xe/xe_hw_fence_types.h b/drivers/gpu/drm/xe/xe_hw_fence_types.h index b33c4956e8ea0eba0a9f4438ba72577e7e3b69d1..364a61f4bfda98cf6a1111e18f233caad5aeb172 100644 --- a/drivers/gpu/drm/xe/xe_hw_fence_types.h +++ b/drivers/gpu/drm/xe/xe_hw_fence_types.h @@ -12,6 +12,7 @@ #include #include +struct xe_device; struct xe_gt; /** @@ -61,8 +62,10 @@ struct xe_hw_fence_ctx { struct xe_hw_fence { /** @dma: base dma fence for hardware fence context */ struct dma_fence dma; - /** @ctx: hardware fence context */ - struct xe_hw_fence_ctx *ctx; + /** @xe: Xe device for hw fence driver name */ + struct xe_device *xe; + /** @name: name of hardware fence context */ + char name[MAX_FENCE_NAME_LEN]; /** @seqno_map: I/O map for seqno */ struct iosys_map seqno_map; /** @irq_link: Link in struct xe_hw_fence_irq.pending */ diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c index 0c8ce09e5025055f14941eca518fff9f9c87525b..832ea81faeee509811cd0e312045715ba048136f 100644 --- a/drivers/gpu/drm/xe/xe_hwmon.c +++ b/drivers/gpu/drm/xe/xe_hwmon.c @@ -203,9 +203,10 @@ static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long va reg_val = xe_mmio_rmw32(hwmon->gt, rapl_limit, PKG_PWR_LIM_1_EN, 0); reg_val = xe_mmio_read32(hwmon->gt, rapl_limit); if (reg_val & PKG_PWR_LIM_1_EN) { + drm_warn(>_to_xe(hwmon->gt)->drm, "PL1 disable is not supported!\n"); ret = -EOPNOTSUPP; - goto unlock; } + goto unlock; } /* Computation in 64-bits to avoid overflow. Round to nearest. */ diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c index 94ff62e1d95eb1e31f5bfd41a772e8d9662652c1..58121821f0814a35b2947b0417ebdde4b62742c8 100644 --- a/drivers/gpu/drm/xe/xe_lrc.c +++ b/drivers/gpu/drm/xe/xe_lrc.c @@ -1634,6 +1634,9 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) if (!snapshot) return NULL; + if (lrc->bo && lrc->bo->vm) + xe_vm_get(lrc->bo->vm); + snapshot->context_desc = xe_lrc_ggtt_addr(lrc); snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc); snapshot->head = xe_lrc_ring_head(lrc); @@ -1653,12 +1656,14 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc) void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) { struct xe_bo *bo; + struct xe_vm *vm; struct iosys_map src; if (!snapshot) return; bo = snapshot->lrc_bo; + vm = bo->vm; snapshot->lrc_bo = NULL; snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL); @@ -1678,6 +1683,8 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot) xe_bo_unlock(bo); put_bo: xe_bo_put(bo); + if (vm) + xe_vm_put(vm); } void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p) @@ -1727,8 +1734,14 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot) return; kvfree(snapshot->lrc_snapshot); - if (snapshot->lrc_bo) + if (snapshot->lrc_bo) { + struct xe_vm *vm; + + vm = snapshot->lrc_bo->vm; xe_bo_put(snapshot->lrc_bo); + if (vm) + xe_vm_put(vm); + } kfree(snapshot); } diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c index f92faad4b96d3c9aeaf4165c9c3bf630e5a51c0c..aa68cac9fdf8086f71d9fb59f615baa4f423e99a 100644 --- a/drivers/gpu/drm/xe/xe_mmio.c +++ b/drivers/gpu/drm/xe/xe_mmio.c @@ -30,7 +30,8 @@ static void tiles_fini(void *arg) int id; for_each_tile(tile, xe, id) - tile->mmio.regs = NULL; + if (tile != xe_device_get_root_tile(xe)) + tile->mmio.regs = NULL; } int xe_mmio_probe_tiles(struct xe_device *xe) @@ -91,9 +92,11 @@ add_mmio_ext: static void mmio_fini(void *arg) { struct xe_device *xe = arg; + struct xe_tile *root_tile = xe_device_get_root_tile(xe); pci_iounmap(to_pci_dev(xe->drm.dev), xe->mmio.regs); xe->mmio.regs = NULL; + root_tile->mmio.regs = NULL; } int xe_mmio_init(struct xe_device *xe) @@ -121,12 +124,29 @@ int xe_mmio_init(struct xe_device *xe) return devm_add_action_or_reset(xe->drm.dev, mmio_fini, xe); } +static void mmio_flush_pending_writes(struct xe_gt *gt) +{ +#define DUMMY_REG_OFFSET 0x130030 + struct xe_tile *tile = gt_to_tile(gt); + int i; + + if (tile->xe->info.platform != XE_LUNARLAKE) + return; + + /* 4 dummy writes */ + for (i = 0; i < 4; i++) + writel(0, tile->mmio.regs + DUMMY_REG_OFFSET); +} + u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg) { struct xe_tile *tile = gt_to_tile(gt); u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u8 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readb((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -139,6 +159,9 @@ u16 xe_mmio_read16(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u16 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + val = readw((reg.ext ? tile->mmio_ext.regs : tile->mmio.regs) + addr); trace_xe_reg_rw(gt, false, addr, val, sizeof(val)); @@ -160,6 +183,9 @@ u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg) u32 addr = xe_mmio_adjusted_addr(gt, reg.addr); u32 val; + /* Wa_15015404425 */ + mmio_flush_pending_writes(gt); + if (!reg.vf && IS_SRIOV_VF(gt_to_xe(gt))) val = xe_gt_sriov_vf_read32(gt, reg); else diff --git a/drivers/gpu/drm/xe/xe_observation.c b/drivers/gpu/drm/xe/xe_observation.c index fcb584b42a7d511e5fd6656b37acfacfe305f85a..a78c92a44ec2d2ec31757f58ff289bd243872b35 100644 --- a/drivers/gpu/drm/xe/xe_observation.c +++ b/drivers/gpu/drm/xe/xe_observation.c @@ -66,7 +66,6 @@ static struct ctl_table observation_ctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - {} }; /** diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c index 4ee32ee1cc885fc939a81cc41599726b030d845c..722278cc23fc5d7058452a1a3d1b472bf8bbffa4 100644 --- a/drivers/gpu/drm/xe/xe_pat.c +++ b/drivers/gpu/drm/xe/xe_pat.c @@ -7,6 +7,8 @@ #include +#include + #include "regs/xe_reg_defs.h" #include "xe_assert.h" #include "xe_device.h" @@ -15,6 +17,7 @@ #include "xe_gt_mcr.h" #include "xe_mmio.h" #include "xe_sriov.h" +#include "xe_wa.h" #define _PAT_ATS 0x47fc #define _PAT_INDEX(index) _PICK_EVEN_2RANGES(index, 8, \ @@ -382,7 +385,13 @@ void xe_pat_init_early(struct xe_device *xe) if (GRAPHICS_VER(xe) == 20) { xe->pat.ops = &xe2_pat_ops; xe->pat.table = xe2_pat_table; - xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + + /* Wa_16023588340. XXX: Should use XE_WA */ + if (GRAPHICS_VERx100(xe) == 2001) + xe->pat.n_entries = 28; /* Disable CLOS3 */ + else + xe->pat.n_entries = ARRAY_SIZE(xe2_pat_table); + xe->pat.idx[XE_CACHE_NONE] = 3; xe->pat.idx[XE_CACHE_WT] = 15; xe->pat.idx[XE_CACHE_WB] = 2; diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c index de3b5df65e4816989438624e8c68fae2c2020f7d..9a3f618d22dcbf4491ab280e636c81133335ac37 100644 --- a/drivers/gpu/drm/xe/xe_pm.c +++ b/drivers/gpu/drm/xe/xe_pm.c @@ -91,13 +91,13 @@ int xe_pm_suspend(struct xe_device *xe) for_each_gt(gt, xe, id) xe_gt_suspend_prepare(gt); + xe_display_pm_suspend(xe, false); + /* FIXME: Super racey... */ err = xe_bo_evict_all(xe); if (err) goto err; - xe_display_pm_suspend(xe, false); - for_each_gt(gt, xe, id) { err = xe_gt_suspend(gt); if (err) { @@ -151,11 +151,11 @@ int xe_pm_resume(struct xe_device *xe) xe_irq_resume(xe); - xe_display_pm_resume(xe, false); - for_each_gt(gt, xe, id) xe_gt_resume(gt); + xe_display_pm_resume(xe, false); + err = xe_bo_restore_user(xe); if (err) goto err; @@ -363,10 +363,11 @@ int xe_pm_runtime_suspend(struct xe_device *xe) mutex_unlock(&xe->mem_access.vram_userfault.lock); if (xe->d3cold.allowed) { + xe_display_pm_suspend(xe, true); + err = xe_bo_evict_all(xe); if (err) goto out; - xe_display_pm_suspend(xe, true); } for_each_gt(gt, xe, id) { diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c index e8b8ae5c6485e3798d93e3f8aa072e482e850cfb..c453f45328b1c57eac65f32341aee489bc071379 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence.c +++ b/drivers/gpu/drm/xe/xe_preempt_fence.c @@ -128,8 +128,9 @@ xe_preempt_fence_arm(struct xe_preempt_fence *pfence, struct xe_exec_queue *q, { list_del_init(&pfence->link); pfence->q = xe_exec_queue_get(q); + spin_lock_init(&pfence->lock); dma_fence_init(&pfence->base, &preempt_fence_ops, - &q->lr.lock, context, seqno); + &pfence->lock, context, seqno); return &pfence->base; } diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h index b54b5c29b5331e7a0c445abbc10551afa76525fd..312c3372a49f902c3b6a38c1fa26aacb78c1b67b 100644 --- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h +++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h @@ -25,6 +25,8 @@ struct xe_preempt_fence { struct xe_exec_queue *q; /** @preempt_work: work struct which issues preemption */ struct work_struct preempt_work; + /** @lock: dma-fence fence lock */ + spinlock_t lock; /** @error: preempt fence is in error state */ int error; }; diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c index ade9e7a3a0adb4a480a46cbdafbafd5e32990927..31a751a5de3f1f13d956973863e83a8e56058050 100644 --- a/drivers/gpu/drm/xe/xe_pt.c +++ b/drivers/gpu/drm/xe/xe_pt.c @@ -1115,23 +1115,6 @@ struct invalidation_fence { u32 asid; }; -static const char * -invalidation_fence_get_driver_name(struct dma_fence *dma_fence) -{ - return "xe"; -} - -static const char * -invalidation_fence_get_timeline_name(struct dma_fence *dma_fence) -{ - return "invalidation_fence"; -} - -static const struct dma_fence_ops invalidation_fence_ops = { - .get_driver_name = invalidation_fence_get_driver_name, - .get_timeline_name = invalidation_fence_get_timeline_name, -}; - static void invalidation_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb) { @@ -1170,15 +1153,8 @@ static int invalidation_fence_init(struct xe_gt *gt, trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base); - spin_lock_irq(>->tlb_invalidation.lock); - dma_fence_init(&ifence->base.base, &invalidation_fence_ops, - >->tlb_invalidation.lock, - dma_fence_context_alloc(1), 1); - spin_unlock_irq(>->tlb_invalidation.lock); - - INIT_LIST_HEAD(&ifence->base.link); + xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false); - dma_fence_get(&ifence->base.base); /* Ref for caller */ ifence->fence = fence; ifence->gt = gt; ifence->start = start; diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c index 02e28274282f9330b9d27e87d3bde55d01fafc75..5efe83cc82ab5e6eea2668381e0accddf1e343d4 100644 --- a/drivers/gpu/drm/xe/xe_rtp.c +++ b/drivers/gpu/drm/xe/xe_rtp.c @@ -231,7 +231,7 @@ static void rtp_mark_active(struct xe_device *xe, if (first == last) bitmap_set(ctx->active_entries, first, 1); else - bitmap_set(ctx->active_entries, first, last - first + 2); + bitmap_set(ctx->active_entries, first, last - first + 1); } /** diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c index 44d534e362cd39ebc338810c6f6a7318bd1cf75b..9628f9deb3c0122d4d0f1b76619243b7b38718d2 100644 --- a/drivers/gpu/drm/xe/xe_sched_job.c +++ b/drivers/gpu/drm/xe/xe_sched_job.c @@ -171,12 +171,13 @@ void xe_sched_job_destroy(struct kref *ref) struct xe_sched_job *job = container_of(ref, struct xe_sched_job, refcount); struct xe_device *xe = job_to_xe(job); + struct xe_exec_queue *q = job->q; xe_sched_job_free_fences(job); - xe_exec_queue_put(job->q); dma_fence_put(job->fence); drm_sched_job_cleanup(&job->drm); job_free(job); + xe_exec_queue_put(q); xe_pm_runtime_put(xe); } diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index 2883d9aca404e658d6bf182bc2489ae3f9fc8db5..e8d31e0108601981a1ba70434f97e7d80945a342 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -53,14 +53,18 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, u64 value) { struct xe_user_fence *ufence; + u64 __user *ptr = u64_to_user_ptr(addr); + + if (!access_ok(ptr, sizeof(ptr))) + return ERR_PTR(-EFAULT); ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) - return NULL; + return ERR_PTR(-ENOMEM); ufence->xe = xe; kref_init(&ufence->refcount); - ufence->addr = u64_to_user_ptr(addr); + ufence->addr = ptr; ufence->value = value; ufence->mm = current->mm; mmgrab(ufence->mm); @@ -183,8 +187,8 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef, } else { sync->ufence = user_fence_create(xe, sync_in.addr, sync_in.timeline_value); - if (XE_IOCTL_DBG(xe, !sync->ufence)) - return -ENOMEM; + if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence))) + return PTR_ERR(sync->ufence); } break; @@ -263,7 +267,7 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync) if (sync->fence) dma_fence_put(sync->fence); if (sync->chain_fence) - dma_fence_put(&sync->chain_fence->base); + dma_fence_chain_free(sync->chain_fence); if (sync->ufence) user_fence_put(sync->ufence); } diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h index baba14fb1e32e6b511dff637e7330c5221b9bdf7..01837f6f609f52f98663a2cf1bc4b968a1bd42ab 100644 --- a/drivers/gpu/drm/xe/xe_trace.h +++ b/drivers/gpu/drm/xe/xe_trace.h @@ -309,7 +309,7 @@ DECLARE_EVENT_CLASS(xe_hw_fence, TP_ARGS(fence), TP_STRUCT__entry( - __string(dev, __dev_name_gt(fence->ctx->gt)) + __string(dev, __dev_name_xe(fence->xe)) __field(u64, ctx) __field(u32, seqno) __field(struct xe_hw_fence *, fence) diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c86250d3d23106888c215e034f617..423b261ea7430533ba3986df29fba3c7d54dc6f0 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 5b166fa03684e24e05e0309976ee2d378af5dff2..c7561a56abaf203a4b5bda80910e48633bcfbd00 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1601,6 +1601,10 @@ static void vm_destroy_work_func(struct work_struct *w) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + + if (vm->xef) + xe_file_put(vm->xef); + kfree(vm); } @@ -1916,7 +1920,7 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data, } args->vm_id = id; - vm->xef = xef; + vm->xef = xe_file_get(xef); /* Record BO memory for VM pagetable created against client */ for_each_tile(tile, xe, id) @@ -3337,10 +3341,10 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) { struct xe_device *xe = xe_vma_vm(vma)->xe; struct xe_tile *tile; + struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE]; u32 tile_needs_invalidate = 0; - int seqno[XE_MAX_TILES_PER_DEVICE]; u8 id; - int ret; + int ret = 0; xe_assert(xe, !xe_vma_is_null(vma)); trace_xe_vma_invalidate(vma); @@ -3365,29 +3369,33 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) for_each_tile(tile, xe, id) { if (xe_pt_zap_ptes(tile, vma)) { - tile_needs_invalidate |= BIT(id); xe_device_wmb(xe); + xe_gt_tlb_invalidation_fence_init(tile->primary_gt, + &fence[id], true); + /* * FIXME: We potentially need to invalidate multiple * GTs within the tile */ - seqno[id] = xe_gt_tlb_invalidation_vma(tile->primary_gt, NULL, vma); - if (seqno[id] < 0) - return seqno[id]; - } - } + ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, + &fence[id], vma); + if (ret < 0) { + xe_gt_tlb_invalidation_fence_fini(&fence[id]); + goto wait; + } - for_each_tile(tile, xe, id) { - if (tile_needs_invalidate & BIT(id)) { - ret = xe_gt_tlb_invalidation_wait(tile->primary_gt, seqno[id]); - if (ret < 0) - return ret; + tile_needs_invalidate |= BIT(id); } } +wait: + for_each_tile(tile, xe, id) + if (tile_needs_invalidate & BIT(id)) + xe_gt_tlb_invalidation_fence_wait(&fence[id]); + vma->tile_invalidated = vma->tile_mask; - return 0; + return ret; } struct xe_vm_snapshot { diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index c7bf0862b23188a393aca0c8e950c5fcc8a89954..e648265d081be23c907db9c04169ce1759127b62 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -486,6 +486,10 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)), XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, SLM_WMTP_RESTORE)) }, + { XE_RTP_NAME("14021402888"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) + }, /* Xe2_HPG */ @@ -538,6 +542,20 @@ static const struct xe_rtp_entry_sr engine_was[] = { XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)), XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE)) }, + { XE_RTP_NAME("14021821874"), + XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)), + XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT)) + }, + + /* Xe2_LPM */ + + { XE_RTP_NAME("16021639441"), + XE_RTP_RULES(MEDIA_VERSION(2000)), + XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0), + GHWSP_CSB_REPORT_DIS | + PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS, + XE_RTP_ACTION_FLAG(ENGINE_BASE))) + }, /* Xe2_HPM */ diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules index 26066beb4f6f5dc813fc8e4ad58045563dc51c30..08f7336881e32dfd083c328c879f3fbfc48be495 100644 --- a/drivers/gpu/drm/xe/xe_wa_oob.rules +++ b/drivers/gpu/drm/xe/xe_wa_oob.rules @@ -29,3 +29,4 @@ 13011645652 GRAPHICS_VERSION(2004) 22019338487 MEDIA_VERSION(2000) GRAPHICS_VERSION(2001) +16023588340 GRAPHICS_VERSION(2001) diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_client.c b/drivers/hid/amd-sfh-hid/amd_sfh_client.c index bdb578e0899f55bb1b632cd17ef10200b52e3dd9..4b59687ff5d821ebb3791e1288592b6c9f6f159d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_client.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_client.c @@ -288,12 +288,22 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) mp2_ops->start(privdata, info); cl_data->sensor_sts[i] = amd_sfh_wait_for_response (privdata, cl_data->sensor_idx[i], SENSOR_ENABLED); + + if (cl_data->sensor_sts[i] == SENSOR_ENABLED) + cl_data->is_any_sensor_enabled = true; + } + + if (!cl_data->is_any_sensor_enabled || + (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { + dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", + cl_data->is_any_sensor_enabled); + rc = -EOPNOTSUPP; + goto cleanup; } for (i = 0; i < cl_data->num_hid_devices; i++) { cl_data->cur_hid_dev = i; if (cl_data->sensor_sts[i] == SENSOR_ENABLED) { - cl_data->is_any_sensor_enabled = true; rc = amdtp_hid_probe(i, cl_data); if (rc) goto cleanup; @@ -305,12 +315,6 @@ int amd_sfh_hid_client_init(struct amd_mp2_dev *privdata) cl_data->sensor_sts[i]); } - if (!cl_data->is_any_sensor_enabled || - (mp2_ops->discovery_status && mp2_ops->discovery_status(privdata) == 0)) { - dev_warn(dev, "Failed to discover, sensors not enabled is %d\n", cl_data->is_any_sensor_enabled); - rc = -EOPNOTSUPP; - goto cleanup; - } schedule_delayed_work(&cl_data->work_buffer, msecs_to_jiffies(AMD_SFH_IDLE_LOOP)); return 0; diff --git a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c index 705b523370684551c349872252403467f1d0f67a..81f3024b7b1b51207cb75534afa8cf87cda5530d 100644 --- a/drivers/hid/amd-sfh-hid/amd_sfh_hid.c +++ b/drivers/hid/amd-sfh-hid/amd_sfh_hid.c @@ -171,11 +171,13 @@ err_hid_data: void amdtp_hid_remove(struct amdtp_cl_data *cli_data) { int i; + struct amdtp_hid_data *hid_data; for (i = 0; i < cli_data->num_hid_devices; ++i) { if (cli_data->hid_sensor_hubs[i]) { - kfree(cli_data->hid_sensor_hubs[i]->driver_data); + hid_data = cli_data->hid_sensor_hubs[i]->driver_data; hid_destroy_device(cli_data->hid_sensor_hubs[i]); + kfree(hid_data); cli_data->hid_sensor_hubs[i] = NULL; } } diff --git a/drivers/hid/bpf/Kconfig b/drivers/hid/bpf/Kconfig index 83214bae67687ab497cfd7bc3116c528dcd6a2f6..d65482e02a6c27bf4bc1ab41bf66df2ba0a98041 100644 --- a/drivers/hid/bpf/Kconfig +++ b/drivers/hid/bpf/Kconfig @@ -3,7 +3,7 @@ menu "HID-BPF support" config HID_BPF bool "HID-BPF support" - depends on BPF + depends on BPF_JIT depends on BPF_SYSCALL depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS help diff --git a/drivers/hid/bpf/hid_bpf_struct_ops.c b/drivers/hid/bpf/hid_bpf_struct_ops.c index f59cce6e437f7720f028d988d79b23a5810dfedd..cd696c59ba0f4faec12234cb3154ca09dced5384 100644 --- a/drivers/hid/bpf/hid_bpf_struct_ops.c +++ b/drivers/hid/bpf/hid_bpf_struct_ops.c @@ -183,6 +183,10 @@ static int hid_bpf_reg(void *kdata, struct bpf_link *link) struct hid_device *hdev; int count, err = 0; + /* prevent multiple attach of the same struct_ops */ + if (ops->hdev) + return -EINVAL; + hdev = hid_get_device(ops->hid_id); if (IS_ERR(hdev)) return PTR_ERR(hdev); @@ -248,6 +252,7 @@ static void hid_bpf_unreg(void *kdata, struct bpf_link *link) list_del_rcu(&ops->list); synchronize_srcu(&hdev->bpf.srcu); + ops->hdev = NULL; reconnect = hdev->bpf.rdesc_ops == ops; if (reconnect) diff --git a/drivers/hid/hid-asus.c b/drivers/hid/hid-asus.c index 37e6d25593c211d202ccb8f6996b1cd2818ee69e..a282388b7aa5c1df7d05e53f5712cfb5f6ef6958 100644 --- a/drivers/hid/hid-asus.c +++ b/drivers/hid/hid-asus.c @@ -1248,6 +1248,9 @@ static const struct hid_device_id asus_devices[] = { { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY), QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, + { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, + USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X), + QUIRK_USE_KBD_BACKLIGHT | QUIRK_ROG_NKEY_KEYBOARD }, { HID_USB_DEVICE(USB_VENDOR_ID_ASUSTEK, USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD), QUIRK_ROG_CLAYMORE_II_KEYBOARD }, diff --git a/drivers/hid/hid-cougar.c b/drivers/hid/hid-cougar.c index cb8bd8aae15b511c3ff18388c2bdf5d95e3f4637..0fa785f52707ac22a7e39a22f5a9e643ec734465 100644 --- a/drivers/hid/hid-cougar.c +++ b/drivers/hid/hid-cougar.c @@ -106,7 +106,7 @@ static void cougar_fix_g6_mapping(void) static __u8 *cougar_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int *rsize) { - if (rdesc[2] == 0x09 && rdesc[3] == 0x02 && + if (*rsize >= 117 && rdesc[2] == 0x09 && rdesc[3] == 0x02 && (rdesc[115] | rdesc[116] << 8) >= HID_MAX_USAGES) { hid_info(hdev, "usage count exceeds max: fixing up report descriptor\n"); diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h index 72d56ee7ce1b98c6e5a23b67eb83cd9818db8be3..781c5aa298598a19bf79b22a0e6af0560053846a 100644 --- a/drivers/hid/hid-ids.h +++ b/drivers/hid/hid-ids.h @@ -210,6 +210,7 @@ #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_KEYBOARD3 0x1a30 #define USB_DEVICE_ID_ASUSTEK_ROG_Z13_LIGHTBAR 0x18c6 #define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY 0x1abe +#define USB_DEVICE_ID_ASUSTEK_ROG_NKEY_ALLY_X 0x1b4c #define USB_DEVICE_ID_ASUSTEK_ROG_CLAYMORE_II_KEYBOARD 0x196b #define USB_DEVICE_ID_ASUSTEK_FX503VD_KEYBOARD 0x1869 @@ -520,6 +521,8 @@ #define USB_DEVICE_ID_GENERAL_TOUCH_WIN8_PIT_E100 0xe100 #define I2C_VENDOR_ID_GOODIX 0x27c6 +#define I2C_DEVICE_ID_GOODIX_01E8 0x01e8 +#define I2C_DEVICE_ID_GOODIX_01E9 0x01e9 #define I2C_DEVICE_ID_GOODIX_01F0 0x01f0 #define USB_VENDOR_ID_GOODTOUCH 0x1aad diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 56fc78841f245a9086b8dc73610cb83642b4b7b7..99812c0f830b5e432a4f443c09726680e63e39bb 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -1441,6 +1441,30 @@ static int mt_event(struct hid_device *hid, struct hid_field *field, return 0; } +static __u8 *mt_report_fixup(struct hid_device *hdev, __u8 *rdesc, + unsigned int *size) +{ + if (hdev->vendor == I2C_VENDOR_ID_GOODIX && + (hdev->product == I2C_DEVICE_ID_GOODIX_01E8 || + hdev->product == I2C_DEVICE_ID_GOODIX_01E9)) { + if (rdesc[607] == 0x15) { + rdesc[607] = 0x25; + dev_info( + &hdev->dev, + "GT7868Q report descriptor fixup is applied.\n"); + } else { + dev_info( + &hdev->dev, + "The byte is not expected for fixing the report descriptor. \ +It's possible that the touchpad firmware is not suitable for applying the fix. \ +got: %x\n", + rdesc[607]); + } + } + + return rdesc; +} + static void mt_report(struct hid_device *hid, struct hid_report *report) { struct mt_device *td = hid_get_drvdata(hid); @@ -2035,6 +2059,14 @@ static const struct hid_device_id mt_devices[] = { MT_BT_DEVICE(USB_VENDOR_ID_FRUCTEL, USB_DEVICE_ID_GAMETEL_MT_MODE) }, + /* Goodix GT7868Q devices */ + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + { .driver_data = MT_CLS_WIN_8_FORCE_MULTI_INPUT_NSMU, + HID_DEVICE(BUS_I2C, HID_GROUP_ANY, I2C_VENDOR_ID_GOODIX, + I2C_DEVICE_ID_GOODIX_01E8) }, + /* GoodTouch panels */ { .driver_data = MT_CLS_NSMU, MT_USB_DEVICE(USB_VENDOR_ID_GOODTOUCH, @@ -2270,6 +2302,7 @@ static struct hid_driver mt_driver = { .feature_mapping = mt_feature_mapping, .usage_table = mt_grabbed_usages, .event = mt_event, + .report_fixup = mt_report_fixup, .report = mt_report, .suspend = pm_ptr(mt_suspend), .reset_resume = pm_ptr(mt_reset_resume), diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c index a44367aef62164f1e76eeecd4ee22d2ad61c6a26..2541fa2e0fa3b15aff4f393d314f740cdb597ea4 100644 --- a/drivers/hid/wacom_wac.c +++ b/drivers/hid/wacom_wac.c @@ -692,78 +692,28 @@ static bool wacom_is_art_pen(int tool_id) static int wacom_intuos_get_tool_type(int tool_id) { - int tool_type = BTN_TOOL_PEN; - - if (wacom_is_art_pen(tool_id)) - return tool_type; - switch (tool_id) { case 0x812: /* Inking pen */ case 0x801: /* Intuos3 Inking pen */ case 0x12802: /* Intuos4/5 Inking Pen */ case 0x012: - tool_type = BTN_TOOL_PENCIL; - break; - - case 0x822: /* Pen */ - case 0x842: - case 0x852: - case 0x823: /* Intuos3 Grip Pen */ - case 0x813: /* Intuos3 Classic Pen */ - case 0x802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x8e2: /* IntuosHT2 pen */ - case 0x022: - case 0x200: /* Pro Pen 3 */ - case 0x04200: /* Pro Pen 3 */ - case 0x10842: /* MobileStudio Pro Pro Pen slim */ - case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */ - case 0x16802: /* Cintiq 13HD Pro Pen */ - case 0x18802: /* DTH2242 Pen */ - case 0x10802: /* Intuos4/5 13HD/24HD General Pen */ - case 0x80842: /* Intuos Pro and Cintiq Pro 3D Pen */ - tool_type = BTN_TOOL_PEN; - break; + return BTN_TOOL_PENCIL; case 0x832: /* Stroke pen */ case 0x032: - tool_type = BTN_TOOL_BRUSH; - break; + return BTN_TOOL_BRUSH; case 0x007: /* Mouse 4D and 2D */ case 0x09c: case 0x094: case 0x017: /* Intuos3 2D Mouse */ case 0x806: /* Intuos4 Mouse */ - tool_type = BTN_TOOL_MOUSE; - break; + return BTN_TOOL_MOUSE; case 0x096: /* Lens cursor */ case 0x097: /* Intuos3 Lens cursor */ case 0x006: /* Intuos4 Lens cursor */ - tool_type = BTN_TOOL_LENS; - break; - - case 0x82a: /* Eraser */ - case 0x84a: - case 0x85a: - case 0x91a: - case 0xd1a: - case 0x0fa: - case 0x82b: /* Intuos3 Grip Pen Eraser */ - case 0x81b: /* Intuos3 Classic Pen Eraser */ - case 0x91b: /* Intuos3 Airbrush Eraser */ - case 0x80c: /* Intuos4/5 13HD/24HD Marker Pen Eraser */ - case 0x80a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - case 0x90a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1480a: /* Intuos4/5 13HD/24HD Classic Pen Eraser */ - case 0x1090a: /* Intuos4/5 13HD/24HD Airbrush Eraser */ - case 0x1080c: /* Intuos4/5 13HD/24HD Art Pen Eraser */ - case 0x1084a: /* MobileStudio Pro Pro Pen slim Eraser */ - case 0x1680a: /* Cintiq 13HD Pro Pen Eraser */ - case 0x1880a: /* DTH2242 Eraser */ - case 0x1080a: /* Intuos4/5 13HD/24HD General Pen Eraser */ - tool_type = BTN_TOOL_RUBBER; - break; + return BTN_TOOL_LENS; case 0xd12: case 0x912: @@ -771,10 +721,13 @@ static int wacom_intuos_get_tool_type(int tool_id) case 0x913: /* Intuos3 Airbrush */ case 0x902: /* Intuos4/5 13HD/24HD Airbrush */ case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */ - tool_type = BTN_TOOL_AIRBRUSH; - break; + return BTN_TOOL_AIRBRUSH; + + default: + if (tool_id & 0x0008) + return BTN_TOOL_RUBBER; + return BTN_TOOL_PEN; } - return tool_type; } static void wacom_exit_report(struct wacom_wac *wacom) @@ -1925,12 +1878,14 @@ static void wacom_map_usage(struct input_dev *input, struct hid_usage *usage, int fmax = field->logical_maximum; unsigned int equivalent_usage = wacom_equivalent_usage(usage->hid); int resolution_code = code; - int resolution = hidinput_calc_abs_res(field, resolution_code); + int resolution; if (equivalent_usage == HID_DG_TWIST) { resolution_code = ABS_RZ; } + resolution = hidinput_calc_abs_res(field, resolution_code); + if (equivalent_usage == HID_GD_X) { fmin += features->offset_left; fmax -= features->offset_right; diff --git a/drivers/hwmon/adt7475.c b/drivers/hwmon/adt7475.c index bc186c61a2c0a575118f1c97bf28f39881da5c31..382a2bb9168a50ddde0fe2ecdd72d2d635a4dfce 100644 --- a/drivers/hwmon/adt7475.c +++ b/drivers/hwmon/adt7475.c @@ -22,23 +22,23 @@ #include /* Indexes for the sysfs hooks */ - -#define INPUT 0 -#define MIN 1 -#define MAX 2 -#define CONTROL 3 -#define OFFSET 3 -#define AUTOMIN 4 -#define THERM 5 -#define HYSTERSIS 6 - +enum adt_sysfs_id { + INPUT = 0, + MIN = 1, + MAX = 2, + CONTROL = 3, + OFFSET = 3, // Dup + AUTOMIN = 4, + THERM = 5, + HYSTERSIS = 6, /* * These are unique identifiers for the sysfs functions - unlike the * numbers above, these are not also indexes into an array */ + ALARM = 9, + FAULT = 10, +}; -#define ALARM 9 -#define FAULT 10 /* 7475 Common Registers */ diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 0a8b95ce35f79336251675d7f7bb491e58765b9d..06e836e3e877339233d5c514c63e8b66c09000ba 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -986,12 +986,17 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) return ret; ret = clk_prepare_enable(gi2c->core_clk); - if (ret) + if (ret) { + geni_icc_disable(&gi2c->se); return ret; + } ret = geni_se_resources_on(&gi2c->se); - if (ret) + if (ret) { + clk_disable_unprepare(gi2c->core_clk); + geni_icc_disable(&gi2c->se); return ret; + } enable_irq(gi2c->irq); gi2c->suspended = 0; diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 85b31edc558dff21a54120ea2f8b01e3dd8a3f00..1df5b42041427c87753cd10739805412890cf726 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1802,9 +1802,9 @@ static int tegra_i2c_probe(struct platform_device *pdev) * domain. * * VI I2C device shouldn't be marked as IRQ-safe because VI I2C won't - * be used for atomic transfers. + * be used for atomic transfers. ACPI device is not IRQ safe also. */ - if (!IS_VI(i2c_dev)) + if (!IS_VI(i2c_dev) && !has_acpi_companion(i2c_dev->dev)) pm_runtime_irq_safe(i2c_dev->dev); pm_runtime_enable(i2c_dev->dev); diff --git a/drivers/i2c/i2c-slave-testunit.c b/drivers/i2c/i2c-slave-testunit.c index 4e03b75f9ad7724d2306ee4a85f5acc6e3f9769f..4c550306f3ec5955f553251724a7d3ebe3d8547c 100644 --- a/drivers/i2c/i2c-slave-testunit.c +++ b/drivers/i2c/i2c-slave-testunit.c @@ -18,7 +18,7 @@ enum testunit_cmds { TU_CMD_READ_BYTES = 1, /* save 0 for ABORT, RESET or similar */ - TU_CMD_HOST_NOTIFY, + TU_CMD_SMBUS_HOST_NOTIFY, TU_CMD_SMBUS_BLOCK_PROC_CALL, TU_NUM_CMDS }; @@ -60,7 +60,7 @@ static void i2c_slave_testunit_work(struct work_struct *work) msg.len = tu->regs[TU_REG_DATAH]; break; - case TU_CMD_HOST_NOTIFY: + case TU_CMD_SMBUS_HOST_NOTIFY: msg.addr = 0x08; msg.flags = 0; msg.len = 3; diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 7e4203df83edb543c7a416ad77106b4137e9c6e8..8256f7aed0cf860ae548a52899380d2df0edab5a 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -34,6 +34,7 @@ static int smbus_do_alert(struct device *dev, void *addrp) struct i2c_client *client = i2c_verify_client(dev); struct alert_data *data = addrp; struct i2c_driver *driver; + int ret; if (!client || client->addr != data->addr) return 0; @@ -47,16 +48,47 @@ static int smbus_do_alert(struct device *dev, void *addrp) device_lock(dev); if (client->dev.driver) { driver = to_i2c_driver(client->dev.driver); - if (driver->alert) + if (driver->alert) { + /* Stop iterating after we find the device */ driver->alert(client, data->type, data->data); - else + ret = -EBUSY; + } else { dev_warn(&client->dev, "no driver alert()!\n"); - } else + ret = -EOPNOTSUPP; + } + } else { dev_dbg(&client->dev, "alert with no driver\n"); + ret = -ENODEV; + } + device_unlock(dev); + + return ret; +} + +/* Same as above, but call back all drivers with alert handler */ + +static int smbus_do_alert_force(struct device *dev, void *addrp) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct alert_data *data = addrp; + struct i2c_driver *driver; + + if (!client || (client->flags & I2C_CLIENT_TEN)) + return 0; + + /* + * Drivers should either disable alerts, or provide at least + * a minimal handler. Lock so the driver won't change. + */ + device_lock(dev); + if (client->dev.driver) { + driver = to_i2c_driver(client->dev.driver); + if (driver->alert) + driver->alert(client, data->type, data->data); + } device_unlock(dev); - /* Stop iterating after we find the device */ - return -EBUSY; + return 0; } /* @@ -67,6 +99,7 @@ static irqreturn_t smbus_alert(int irq, void *d) { struct i2c_smbus_alert *alert = d; struct i2c_client *ara; + unsigned short prev_addr = I2C_CLIENT_END; /* Not a valid address */ ara = alert->ara; @@ -94,8 +127,25 @@ static irqreturn_t smbus_alert(int irq, void *d) data.addr, data.data); /* Notify driver for the device which issued the alert */ - device_for_each_child(&ara->adapter->dev, &data, - smbus_do_alert); + status = device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert); + /* + * If we read the same address more than once, and the alert + * was not handled by a driver, it won't do any good to repeat + * the loop because it will never terminate. Try again, this + * time calling the alert handlers of all devices connected to + * the bus, and abort the loop afterwards. If this helps, we + * are all set. If it doesn't, there is nothing else we can do, + * so we might as well abort the loop. + * Note: This assumes that a driver with alert handler handles + * the alert properly and clears it if necessary. + */ + if (data.addr == prev_addr && status != -EBUSY) { + device_for_each_child(&ara->adapter->dev, &data, + smbus_do_alert_force); + break; + } + prev_addr = data.addr; } return IRQ_HANDLED; diff --git a/drivers/input/input-mt.c b/drivers/input/input-mt.c index 14b53dac1253bfed5ad2dbd9b1dc22cdb9b80c28..6b04a674f832a048a3e021f92517d8529d27f652 100644 --- a/drivers/input/input-mt.c +++ b/drivers/input/input-mt.c @@ -46,6 +46,9 @@ int input_mt_init_slots(struct input_dev *dev, unsigned int num_slots, return 0; if (mt) return mt->num_slots != num_slots ? -EINVAL : 0; + /* Arbitrary limit for avoiding too large memory allocation. */ + if (num_slots > 1024) + return -EINVAL; mt = kzalloc(struct_size(mt, slots, num_slots), GFP_KERNEL); if (!mt) diff --git a/drivers/input/joystick/adc-joystick.c b/drivers/input/joystick/adc-joystick.c index 5f46a7104b52a47406865dccd2174954ee60f006..de1fa4cf291b20c02852e1739d16af1d63cccfae 100644 --- a/drivers/input/joystick/adc-joystick.c +++ b/drivers/input/joystick/adc-joystick.c @@ -182,8 +182,11 @@ static int adc_joystick_set_axes(struct device *dev, struct adc_joystick *joy) swap(range[0], range[1]); } - fwnode_property_read_u32(child, "abs-fuzz", &fuzz); - fwnode_property_read_u32(child, "abs-flat", &flat); + if (fwnode_property_read_u32(child, "abs-fuzz", &fuzz)) + fuzz = 0; + + if (fwnode_property_read_u32(child, "abs-flat", &flat)) + flat = 0; input_set_abs_params(joy->input, axes[i].code, range[0], range[1], fuzz, flat); diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c index d23f3225b00ff7bd7c8a1c7d4ce6fd4ae33d128b..445856c9127aaa3fd3be3e4a9bcfd630e1951904 100644 --- a/drivers/input/misc/uinput.c +++ b/drivers/input/misc/uinput.c @@ -417,6 +417,20 @@ static int uinput_validate_absinfo(struct input_dev *dev, unsigned int code, return -EINVAL; } + /* + * Limit number of contacts to a reasonable value (100). This + * ensures that we need less than 2 pages for struct input_mt + * (we are not using in-kernel slot assignment so not going to + * allocate memory for the "red" table), and we should have no + * trouble getting this much memory. + */ + if (code == ABS_MT_SLOT && max > 99) { + printk(KERN_DEBUG + "%s: unreasonably large number of slots requested: %d\n", + UINPUT_NAME, max); + return -EINVAL; + } + return 0; } diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index 38191c3b31bf512d98b88ac0a0736bfcee5590f4..380aa1614442f42bd1c01c7f9bdf5de256088b33 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -189,6 +189,7 @@ static const char * const smbus_pnp_ids[] = { "LEN2054", /* E480 */ "LEN2055", /* E580 */ "LEN2068", /* T14 Gen 1 */ + "SYN3015", /* HP EliteBook 840 G2 */ "SYN3052", /* HP EliteBook 840 G4 */ "SYN3221", /* HP 15-ay000 */ "SYN323d", /* HP Spectre X360 13-w013dx */ diff --git a/drivers/input/serio/i8042-acpipnpio.h b/drivers/input/serio/i8042-acpipnpio.h index 5b50475ec41402bbbf2f4e9d1e3f1bdb3742c192..bad238f69a7afd8f73a101a95eb57b898f9e4b22 100644 --- a/drivers/input/serio/i8042-acpipnpio.h +++ b/drivers/input/serio/i8042-acpipnpio.h @@ -83,6 +83,7 @@ static inline void i8042_write_command(int val) #define SERIO_QUIRK_KBDRESET BIT(12) #define SERIO_QUIRK_DRITEK BIT(13) #define SERIO_QUIRK_NOPNP BIT(14) +#define SERIO_QUIRK_FORCENORESTORE BIT(15) /* Quirk table for different mainboards. Options similar or identical to i8042 * module parameters. @@ -626,6 +627,15 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { }, .driver_data = (void *)(SERIO_QUIRK_NOMUX) }, + { + /* Fujitsu Lifebook E756 */ + /* https://bugzilla.suse.com/show_bug.cgi?id=1229056 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU"), + DMI_MATCH(DMI_PRODUCT_NAME, "LIFEBOOK E756"), + }, + .driver_data = (void *)(SERIO_QUIRK_NOMUX) + }, { /* Fujitsu Lifebook E5411 */ .matches = { @@ -1149,18 +1159,10 @@ static const struct dmi_system_id i8042_dmi_quirk_table[] __initconst = { SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) }, { - /* - * Setting SERIO_QUIRK_NOMUX or SERIO_QUIRK_RESET_ALWAYS makes - * the keyboard very laggy for ~5 seconds after boot and - * sometimes also after resume. - * However both are required for the keyboard to not fail - * completely sometimes after boot or resume. - */ .matches = { DMI_MATCH(DMI_BOARD_NAME, "N150CU"), }, - .driver_data = (void *)(SERIO_QUIRK_NOMUX | SERIO_QUIRK_RESET_ALWAYS | - SERIO_QUIRK_NOLOOP | SERIO_QUIRK_NOPNP) + .driver_data = (void *)(SERIO_QUIRK_FORCENORESTORE) }, { .matches = { @@ -1685,6 +1687,8 @@ static void __init i8042_check_quirks(void) if (quirks & SERIO_QUIRK_NOPNP) i8042_nopnp = true; #endif + if (quirks & SERIO_QUIRK_FORCENORESTORE) + i8042_forcenorestore = true; } #else static inline void i8042_check_quirks(void) {} @@ -1718,7 +1722,7 @@ static int __init i8042_platform_init(void) i8042_check_quirks(); - pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + pr_debug("Active quirks (empty means none):%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", i8042_nokbd ? " nokbd" : "", i8042_noaux ? " noaux" : "", i8042_nomux ? " nomux" : "", @@ -1738,10 +1742,11 @@ static int __init i8042_platform_init(void) "", #endif #ifdef CONFIG_PNP - i8042_nopnp ? " nopnp" : ""); + i8042_nopnp ? " nopnp" : "", #else - ""); + "", #endif + i8042_forcenorestore ? " forcenorestore" : ""); retval = i8042_pnp_init(); if (retval) diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c index e0fb1db653b7355705d7fb0f35dba9106e5a674c..8ec4872b447145ba750bb6e6e3b6b5933c05f1ff 100644 --- a/drivers/input/serio/i8042.c +++ b/drivers/input/serio/i8042.c @@ -115,6 +115,10 @@ module_param_named(nopnp, i8042_nopnp, bool, 0); MODULE_PARM_DESC(nopnp, "Do not use PNP to detect controller settings"); #endif +static bool i8042_forcenorestore; +module_param_named(forcenorestore, i8042_forcenorestore, bool, 0); +MODULE_PARM_DESC(forcenorestore, "Force no restore on s3 resume, copying s2idle behaviour"); + #define DEBUG #ifdef DEBUG static bool i8042_debug; @@ -1232,7 +1236,7 @@ static int i8042_pm_suspend(struct device *dev) { int i; - if (pm_suspend_via_firmware()) + if (!i8042_forcenorestore && pm_suspend_via_firmware()) i8042_controller_reset(true); /* Set up serio interrupts for system wakeup. */ @@ -1248,7 +1252,7 @@ static int i8042_pm_suspend(struct device *dev) static int i8042_pm_resume_noirq(struct device *dev) { - if (!pm_resume_via_firmware()) + if (i8042_forcenorestore || !pm_resume_via_firmware()) i8042_interrupt(0, NULL); return 0; @@ -1271,7 +1275,7 @@ static int i8042_pm_resume(struct device *dev) * not restore the controller state to whatever it had been at boot * time, so we do not need to do anything. */ - if (!pm_suspend_via_firmware()) + if (i8042_forcenorestore || !pm_suspend_via_firmware()) return 0; /* diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 4247283c7271d5eedab871f48dbe39fb87a2d9bc..f89c0dd15d8b91a426e967f62122fb025fafa21c 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -824,7 +824,7 @@ static void ads7846_read_state(struct ads7846 *ts) m = &ts->msg[msg_idx]; error = spi_sync(ts->spi, m); if (error) { - dev_err(&ts->spi->dev, "spi_sync --> %d\n", error); + dev_err_ratelimited(&ts->spi->dev, "spi_sync --> %d\n", error); packet->ignore = true; return; } diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index 7cb26929dc7328930e09be8416ca67aa9c871a7d..9dc25eb2be445f9d8159240bcbcf3d932366bee0 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -871,7 +871,7 @@ static void cyttsp4_get_mt_touches(struct cyttsp4_mt_data *md, int num_cur_tch) struct cyttsp4_touch tch; int sig; int i, j, t = 0; - int ids[max(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; + int ids[MAX(CY_TMA1036_MAX_TCH, CY_TMA4XX_MAX_TCH)]; memset(ids, 0, si->si_ofs.tch_abs[CY_TCH_T].max * sizeof(int)); for (i = 0; i < num_cur_tch; i++) { diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c index 42f99e57fbb79582700742098991654a5173dca1..e70415f189a5566191717ac585dac2fbaa95afbf 100644 --- a/drivers/input/touchscreen/edt-ft5x06.c +++ b/drivers/input/touchscreen/edt-ft5x06.c @@ -1474,6 +1474,10 @@ static const struct edt_i2c_chip_data edt_ft6236_data = { .max_support_points = 2, }; +static const struct edt_i2c_chip_data edt_ft8201_data = { + .max_support_points = 10, +}; + static const struct edt_i2c_chip_data edt_ft8719_data = { .max_support_points = 10, }; @@ -1485,6 +1489,7 @@ static const struct i2c_device_id edt_ft5x06_ts_id[] = { { .name = "ft5452", .driver_data = (long)&edt_ft5452_data }, /* Note no edt- prefix for compatibility with the ft6236.c driver */ { .name = "ft6236", .driver_data = (long)&edt_ft6236_data }, + { .name = "ft8201", .driver_data = (long)&edt_ft8201_data }, { .name = "ft8719", .driver_data = (long)&edt_ft8719_data }, { /* sentinel */ } }; @@ -1500,6 +1505,7 @@ static const struct of_device_id edt_ft5x06_of_match[] = { { .compatible = "focaltech,ft5452", .data = &edt_ft5452_data }, /* Note focaltech vendor prefix for compatibility with ft6236.c */ { .compatible = "focaltech,ft6236", .data = &edt_ft6236_data }, + { .compatible = "focaltech,ft8201", .data = &edt_ft8201_data }, { .compatible = "focaltech,ft8719", .data = &edt_ft8719_data }, { /* sentinel */ } }; diff --git a/drivers/input/touchscreen/himax_hx83112b.c b/drivers/input/touchscreen/himax_hx83112b.c index 9ed3bccde4ac4a4ce2de00c247dccb57608e0dcf..896a145ddb2bc21c2e7c8447d5b6506e993eb1ad 100644 --- a/drivers/input/touchscreen/himax_hx83112b.c +++ b/drivers/input/touchscreen/himax_hx83112b.c @@ -130,17 +130,6 @@ static int himax_bus_read(struct himax_ts_data *ts, u32 address, void *dst, return 0; } -static int himax_read_mcu(struct himax_ts_data *ts, u32 address, u32 *dst) -{ - int error; - - error = himax_bus_read(ts, address, dst, sizeof(dst)); - if (error) - return error; - - return 0; -} - static void himax_reset(struct himax_ts_data *ts) { gpiod_set_value_cansleep(ts->gpiod_rst, 1); @@ -160,7 +149,8 @@ static int himax_read_product_id(struct himax_ts_data *ts, u32 *product_id) { int error; - error = himax_read_mcu(ts, HIMAX_REG_ADDR_ICID, product_id); + error = himax_bus_read(ts, HIMAX_REG_ADDR_ICID, product_id, + sizeof(*product_id)); if (error) return error; diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index cd679c13752e00c30766fd060ff77b8017934645..81e9cc6e3164a49b3e85a8f96b1d1b2ed6081aaf 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -170,6 +170,7 @@ void iommu_report_device_fault(struct device *dev, struct iopf_fault *evt) report_partial_fault(iopf_param, fault); iopf_put_dev_fault_param(iopf_param); /* A request that is not the last does not need to be ack'd */ + return; } /* diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c index 9a7ec5997c61c5f862a03b3b5fd8cd637e35e14a..3214a4c17c6b3b0883742be3f30e081e6c7f7d32 100644 --- a/drivers/iommu/iommufd/device.c +++ b/drivers/iommu/iommufd/device.c @@ -526,7 +526,7 @@ iommufd_device_do_replace(struct iommufd_device *idev, err_unresv: if (hwpt_is_paging(hwpt)) iommufd_group_remove_reserved_iova(igroup, - to_hwpt_paging(old_hwpt)); + to_hwpt_paging(hwpt)); err_unlock: mutex_unlock(&idev->igroup->lock); return ERR_PTR(rc); diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c index f95e32e2913330d201c0ab6eb792625a538b1797..222cfc11ebfd00bdefdebed0e7eb967a6f42fe9a 100644 --- a/drivers/iommu/iommufd/selftest.c +++ b/drivers/iommu/iommufd/selftest.c @@ -273,7 +273,7 @@ static int mock_domain_read_and_clear_dirty(struct iommu_domain *domain, return 0; } -const struct iommu_dirty_ops dirty_ops = { +static const struct iommu_dirty_ops dirty_ops = { .set_dirty_tracking = mock_domain_set_dirty_tracking, .read_and_clear_dirty = mock_domain_read_and_clear_dirty, }; diff --git a/drivers/irqchip/irq-loongarch-cpu.c b/drivers/irqchip/irq-loongarch-cpu.c index 9d8f2c40604310d6d92fdea667c2ea0322ece015..b35903a06902f73e719626205b0ba765d2319269 100644 --- a/drivers/irqchip/irq-loongarch-cpu.c +++ b/drivers/irqchip/irq-loongarch-cpu.c @@ -18,11 +18,13 @@ struct fwnode_handle *cpuintc_handle; static u32 lpic_gsi_to_irq(u32 gsi) { + int irq = 0; + /* Only pch irqdomain transferring is required for LoongArch. */ if (gsi >= GSI_MIN_PCH_IRQ && gsi <= GSI_MAX_PCH_IRQ) - return acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); + irq = acpi_register_gsi(NULL, gsi, ACPI_LEVEL_SENSITIVE, ACPI_ACTIVE_HIGH); - return 0; + return (irq > 0) ? irq : 0; } static struct fwnode_handle *lpic_get_gsi_domain_id(u32 gsi) diff --git a/drivers/irqchip/irq-mbigen.c b/drivers/irqchip/irq-mbigen.c index 093fd42893a7aa7a06d353eebf7121f9e1382f36..53cc08387588f106fb696050b8212a268e948517 100644 --- a/drivers/irqchip/irq-mbigen.c +++ b/drivers/irqchip/irq-mbigen.c @@ -64,6 +64,20 @@ struct mbigen_device { void __iomem *base; }; +static inline unsigned int get_mbigen_node_offset(unsigned int nid) +{ + unsigned int offset = nid * MBIGEN_NODE_OFFSET; + + /* + * To avoid touched clear register in unexpected way, we need to directly + * skip clear register when access to more than 10 mbigen nodes. + */ + if (nid >= (REG_MBIGEN_CLEAR_OFFSET / MBIGEN_NODE_OFFSET)) + offset += MBIGEN_NODE_OFFSET; + + return offset; +} + static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) { unsigned int nid, pin; @@ -72,8 +86,7 @@ static inline unsigned int get_mbigen_vec_reg(irq_hw_number_t hwirq) nid = hwirq / IRQS_PER_MBIGEN_NODE + 1; pin = hwirq % IRQS_PER_MBIGEN_NODE; - return pin * 4 + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_VEC_OFFSET; + return pin * 4 + get_mbigen_node_offset(nid) + REG_MBIGEN_VEC_OFFSET; } static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, @@ -88,8 +101,7 @@ static inline void get_mbigen_type_reg(irq_hw_number_t hwirq, *mask = 1 << (irq_ofst % 32); ofst = irq_ofst / 32 * 4; - *addr = ofst + nid * MBIGEN_NODE_OFFSET - + REG_MBIGEN_TYPE_OFFSET; + *addr = ofst + get_mbigen_node_offset(nid) + REG_MBIGEN_TYPE_OFFSET; } static inline void get_mbigen_clear_reg(irq_hw_number_t hwirq, diff --git a/drivers/irqchip/irq-meson-gpio.c b/drivers/irqchip/irq-meson-gpio.c index 27e30ce41db3a7d3a1f650cf91076439951b6b41..cd789fa5151909342271f46183aecceb6b007f84 100644 --- a/drivers/irqchip/irq-meson-gpio.c +++ b/drivers/irqchip/irq-meson-gpio.c @@ -178,7 +178,7 @@ struct meson_gpio_irq_controller { void __iomem *base; u32 channel_irqs[MAX_NUM_CHANNEL]; DECLARE_BITMAP(channel_map, MAX_NUM_CHANNEL); - spinlock_t lock; + raw_spinlock_t lock; }; static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, @@ -187,14 +187,14 @@ static void meson_gpio_irq_update_bits(struct meson_gpio_irq_controller *ctl, unsigned long flags; u32 tmp; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); tmp = readl_relaxed(ctl->base + reg); tmp &= ~mask; tmp |= val; writel_relaxed(tmp, ctl->base + reg); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); } static void meson_gpio_irq_init_dummy(struct meson_gpio_irq_controller *ctl) @@ -244,12 +244,12 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, unsigned long flags; unsigned int idx; - spin_lock_irqsave(&ctl->lock, flags); + raw_spin_lock_irqsave(&ctl->lock, flags); /* Find a free channel */ idx = find_first_zero_bit(ctl->channel_map, ctl->params->nr_channels); if (idx >= ctl->params->nr_channels) { - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); pr_err("No channel available\n"); return -ENOSPC; } @@ -257,7 +257,7 @@ meson_gpio_irq_request_channel(struct meson_gpio_irq_controller *ctl, /* Mark the channel as used */ set_bit(idx, ctl->channel_map); - spin_unlock_irqrestore(&ctl->lock, flags); + raw_spin_unlock_irqrestore(&ctl->lock, flags); /* * Setup the mux of the channel to route the signal of the pad @@ -567,7 +567,7 @@ static int meson_gpio_irq_of_init(struct device_node *node, struct device_node * if (!ctl) return -ENOMEM; - spin_lock_init(&ctl->lock); + raw_spin_lock_init(&ctl->lock); ctl->base = of_iomap(node, 0); if (!ctl->base) { diff --git a/drivers/irqchip/irq-pic32-evic.c b/drivers/irqchip/irq-pic32-evic.c index 5d6b8e025bb8769bd0d39cb979f0e1ee6949c07c..eb6ca516a166454eafa80c805d08857020133f21 100644 --- a/drivers/irqchip/irq-pic32-evic.c +++ b/drivers/irqchip/irq-pic32-evic.c @@ -161,9 +161,9 @@ static int pic32_irq_domain_map(struct irq_domain *d, unsigned int virq, return ret; } -int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type) +static int pic32_irq_domain_xlate(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type) { struct evic_chip_data *priv = d->host_data; diff --git a/drivers/irqchip/irq-riscv-aplic-msi.c b/drivers/irqchip/irq-riscv-aplic-msi.c index 028444af48bd57d4dd39bd1391cddac5d14a17a8..d7773f76e5d0a531e4b43332a18b1c4f4d248987 100644 --- a/drivers/irqchip/irq-riscv-aplic-msi.c +++ b/drivers/irqchip/irq-riscv-aplic-msi.c @@ -32,15 +32,10 @@ static void aplic_msi_irq_unmask(struct irq_data *d) aplic_irq_unmask(d); } -static void aplic_msi_irq_eoi(struct irq_data *d) +static void aplic_msi_irq_retrigger_level(struct irq_data *d) { struct aplic_priv *priv = irq_data_get_irq_chip_data(d); - /* - * EOI handling is required only for level-triggered interrupts - * when APLIC is in MSI mode. - */ - switch (irqd_get_trigger_type(d)) { case IRQ_TYPE_LEVEL_LOW: case IRQ_TYPE_LEVEL_HIGH: @@ -59,6 +54,29 @@ static void aplic_msi_irq_eoi(struct irq_data *d) } } +static void aplic_msi_irq_eoi(struct irq_data *d) +{ + /* + * EOI handling is required only for level-triggered interrupts + * when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); +} + +static int aplic_msi_irq_set_type(struct irq_data *d, unsigned int type) +{ + int rc = aplic_irq_set_type(d, type); + + if (rc) + return rc; + /* + * Updating sourcecfg register for level-triggered interrupts + * requires interrupt retriggering when APLIC is in MSI mode. + */ + aplic_msi_irq_retrigger_level(d); + return 0; +} + static void aplic_msi_write_msg(struct irq_data *d, struct msi_msg *msg) { unsigned int group_index, hart_index, guest_index, val; @@ -130,7 +148,7 @@ static const struct msi_domain_template aplic_msi_template = { .name = "APLIC-MSI", .irq_mask = aplic_msi_irq_mask, .irq_unmask = aplic_msi_irq_unmask, - .irq_set_type = aplic_irq_set_type, + .irq_set_type = aplic_msi_irq_set_type, .irq_eoi = aplic_msi_irq_eoi, #ifdef CONFIG_SMP .irq_set_affinity = irq_chip_set_affinity_parent, diff --git a/drivers/irqchip/irq-sun6i-r.c b/drivers/irqchip/irq-sun6i-r.c index a01e440494154ecbf93e07a0fcb5f052ecb478a9..99958d470d6268cb5654a333b34eeb31e7ec37fc 100644 --- a/drivers/irqchip/irq-sun6i-r.c +++ b/drivers/irqchip/irq-sun6i-r.c @@ -270,7 +270,7 @@ static const struct irq_domain_ops sun6i_r_intc_domain_ops = { static int sun6i_r_intc_suspend(void) { - u32 buf[BITS_TO_U32(max(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; + u32 buf[BITS_TO_U32(MAX(SUN6I_NR_TOP_LEVEL_IRQS, SUN6I_NR_MUX_BITS))]; int i; /* Wake IRQs are enabled during system sleep and shutdown. */ diff --git a/drivers/irqchip/irq-xilinx-intc.c b/drivers/irqchip/irq-xilinx-intc.c index 238d3d3449496975ec9e1164cc898ac7baf02e54..7e08714d507f4728b1a53423dfc9c5c2fb87188f 100644 --- a/drivers/irqchip/irq-xilinx-intc.c +++ b/drivers/irqchip/irq-xilinx-intc.c @@ -189,7 +189,7 @@ static int __init xilinx_intc_of_init(struct device_node *intc, irqc->intr_mask = 0; } - if (irqc->intr_mask >> irqc->nr_irq) + if ((u64)irqc->intr_mask >> irqc->nr_irq) pr_warn("irq-xilinx: mismatch in kind-of-intr param\n"); pr_info("irq-xilinx: %pOF: num_irq=%d, edge=0x%x\n", diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index c2c07bfa647199139f78522c5c87da3e170e6cee..f299ff393a6a2ceda77bcc566f4d121123d9508c 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1181,8 +1181,26 @@ static int do_resume(struct dm_ioctl *param) suspend_flags &= ~DM_SUSPEND_LOCKFS_FLAG; if (param->flags & DM_NOFLUSH_FLAG) suspend_flags |= DM_SUSPEND_NOFLUSH_FLAG; - if (!dm_suspended_md(md)) - dm_suspend(md, suspend_flags); + if (!dm_suspended_md(md)) { + r = dm_suspend(md, suspend_flags); + if (r) { + down_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (hc && !hc->new_map) { + hc->new_map = new_map; + new_map = NULL; + } else { + r = -ENXIO; + } + up_write(&_hash_lock); + if (new_map) { + dm_sync_table(md); + dm_table_destroy(new_map); + } + dm_put(md); + return r; + } + } old_size = dm_get_size(md); old_map = dm_swap_table(md, new_map); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 97fab2087df86d5b0fba0dc4ce5125499e4b4ca6..87bb903034358234b3112bc9695a9b46403838d7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2737,7 +2737,7 @@ static int dm_wait_for_bios_completion(struct mapped_device *md, unsigned int ta break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } @@ -2762,7 +2762,7 @@ static int dm_wait_for_completion(struct mapped_device *md, unsigned int task_st break; if (signal_pending_state(task_state, current)) { - r = -EINTR; + r = -ERESTARTSYS; break; } diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 04698fd03e6069eb05ddce43aa13cd9f96fdbf23..d48c4fafc779892fbcf66113bb3bce51b98054eb 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -277,7 +277,7 @@ static void sm_metadata_destroy(struct dm_space_map *sm) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); - kfree(smm); + kvfree(smm); } static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) @@ -772,7 +772,7 @@ struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; - smm = kmalloc(sizeof(*smm), GFP_KERNEL); + smm = kvmalloc(sizeof(*smm), GFP_KERNEL); if (!smm) return ERR_PTR(-ENOMEM); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7acfe7c9dc8daa6757f66b287ddc59b5c5614cd4..761989d6790683317102cab48efd20e96adb2f89 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio, return -1; } +static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio) +{ + return !test_bit(In_sync, &rdev->flags) && + rdev->recovery_offset < r1_bio->sector + r1_bio->sectors; +} + static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, int *max_sectors) { @@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio) || test_bit(WriteMostly, &rdev->flags)) continue; @@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio, rdev = conf->mirrors[disk].rdev; if (!rdev || test_bit(Faulty, &rdev->flags) || - !test_bit(WriteMostly, &rdev->flags)) + !test_bit(WriteMostly, &rdev->flags) || + rdev_in_recovery(rdev, r1_bio)) continue; /* there are no bad blocks, we can use this disk */ @@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio) if (!rdev || test_bit(Faulty, &rdev->flags)) return false; - /* still in recovery */ - if (!test_bit(In_sync, &rdev->flags) && - rdev->recovery_offset < r1_bio->sector + r1_bio->sectors) + if (rdev_in_recovery(rdev, r1_bio)) return false; /* don't read from slow disk unless have to */ diff --git a/drivers/media/dvb-frontends/stv0367_priv.h b/drivers/media/dvb-frontends/stv0367_priv.h index 617f605947b2c47e74d95eb3392d61d82ab9619b..7f056d1cce8228d97ce503905b3c673afdf153f8 100644 --- a/drivers/media/dvb-frontends/stv0367_priv.h +++ b/drivers/media/dvb-frontends/stv0367_priv.h @@ -25,8 +25,11 @@ #endif /* MACRO definitions */ +#ifndef MIN #define MAX(X, Y) ((X) >= (Y) ? (X) : (Y)) #define MIN(X, Y) ((X) <= (Y) ? (X) : (Y)) +#endif + #define INRANGE(X, Y, Z) \ ((((X) <= (Y)) && ((Y) <= (Z))) || \ (((Z) <= (Y)) && ((Y) <= (X))) ? 1 : 0) diff --git a/drivers/media/pci/intel/ipu6/Kconfig b/drivers/media/pci/intel/ipu6/Kconfig index 154343080c82a64106f0fca20cdf54cb9c2a21f4..40e20f0aa5ae5d164c8c29fc06c8cd11acc57fc0 100644 --- a/drivers/media/pci/intel/ipu6/Kconfig +++ b/drivers/media/pci/intel/ipu6/Kconfig @@ -3,13 +3,14 @@ config VIDEO_INTEL_IPU6 depends on ACPI || COMPILE_TEST depends on VIDEO_DEV depends on X86 && X86_64 && HAS_DMA + depends on IPU_BRIDGE || !IPU_BRIDGE + select AUXILIARY_BUS select DMA_OPS select IOMMU_IOVA select VIDEO_V4L2_SUBDEV_API select MEDIA_CONTROLLER select VIDEOBUF2_DMA_CONTIG select V4L2_FWNODE - select IPU_BRIDGE help This is the 6th Gen Intel Image Processing Unit, found in Intel SoCs and used for capturing images and video from camera sensors. diff --git a/drivers/media/usb/dvb-usb/dvb-usb-init.c b/drivers/media/usb/dvb-usb/dvb-usb-init.c index 22d83ac18eb7353279b5a5e6e5ba8a8c8f53d924..fbf58012becdf2a40716d68bfc1c17136f423709 100644 --- a/drivers/media/usb/dvb-usb/dvb-usb-init.c +++ b/drivers/media/usb/dvb-usb/dvb-usb-init.c @@ -23,40 +23,11 @@ static int dvb_usb_force_pid_filter_usage; module_param_named(force_pid_filter_usage, dvb_usb_force_pid_filter_usage, int, 0444); MODULE_PARM_DESC(force_pid_filter_usage, "force all dvb-usb-devices to use a PID filter, if any (default: 0)."); -static int dvb_usb_check_bulk_endpoint(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - int ret; - - ret = usb_pipe_type_check(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - ret = usb_pipe_type_check(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - if (ret) - return ret; - } - return 0; -} - -static void dvb_usb_clear_halt(struct dvb_usb_device *d, u8 endpoint) -{ - if (endpoint) { - usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, endpoint)); - usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, endpoint)); - } -} - static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) { struct dvb_usb_adapter *adap; int ret, n, o; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint); - if (ret) - return ret; - ret = dvb_usb_check_bulk_endpoint(d, d->props.generic_bulk_ctrl_endpoint_response); - if (ret) - return ret; for (n = 0; n < d->props.num_adapters; n++) { adap = &d->adapter[n]; adap->dev = d; @@ -132,8 +103,10 @@ static int dvb_usb_adapter_init(struct dvb_usb_device *d, short *adapter_nrs) * when reloading the driver w/o replugging the device * sometimes a timeout occurs, this helps */ - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint); - dvb_usb_clear_halt(d, d->props.generic_bulk_ctrl_endpoint_response); + if (d->props.generic_bulk_ctrl_endpoint != 0) { + usb_clear_halt(d->udev, usb_sndbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + usb_clear_halt(d->udev, usb_rcvbulkpipe(d->udev, d->props.generic_bulk_ctrl_endpoint)); + } return 0; diff --git a/drivers/media/usb/uvc/uvc_ctrl.c b/drivers/media/usb/uvc/uvc_ctrl.c index 0136df5732bac5d6600f035641e07689e14782cd..4fe26e82e3d1c167bd59c5decf79a8391ae465a4 100644 --- a/drivers/media/usb/uvc/uvc_ctrl.c +++ b/drivers/media/usb/uvc/uvc_ctrl.c @@ -2680,6 +2680,10 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, for (i = 0; i < ARRAY_SIZE(uvc_ctrl_mappings); ++i) { const struct uvc_control_mapping *mapping = &uvc_ctrl_mappings[i]; + if (!uvc_entity_match_guid(ctrl->entity, mapping->entity) || + ctrl->info.selector != mapping->selector) + continue; + /* Let the device provide a custom mapping. */ if (mapping->filter_mapping) { mapping = mapping->filter_mapping(chain, ctrl); @@ -2687,9 +2691,7 @@ static void uvc_ctrl_init_ctrl(struct uvc_video_chain *chain, continue; } - if (uvc_entity_match_guid(ctrl->entity, mapping->entity) && - ctrl->info.selector == mapping->selector) - __uvc_ctrl_add_mapping(chain, ctrl, mapping); + __uvc_ctrl_add_mapping(chain, ctrl, mapping); } } diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 41c3d2821a78505ce74eca2012700368c7381305..41c54051347ab7fc3bb8691595b35bdb67a7d40e 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -587,7 +587,7 @@ config NSM config MARVELL_CN10K_DPI tristate "Octeon CN10K DPI driver" - depends on PCI + depends on PCI && PCI_IOV depends on ARCH_THUNDER || (COMPILE_TEST && 64BIT) help Enables Octeon CN10K DMA packet interface (DPI) driver which diff --git a/drivers/misc/eeprom/ee1004.c b/drivers/misc/eeprom/ee1004.c index d4aeeb2b2169204183ddfeeafcbb58691222b24b..89224d4af4a201b0e6b7a4437cc90eb95c6e7b11 100644 --- a/drivers/misc/eeprom/ee1004.c +++ b/drivers/misc/eeprom/ee1004.c @@ -233,6 +233,49 @@ static void ee1004_cleanup_bus_data(void *data) mutex_unlock(&ee1004_bus_lock); } +static int ee1004_init_bus_data(struct i2c_client *client) +{ + struct ee1004_bus_data *bd; + int err, cnr = 0; + + bd = ee1004_get_bus_data(client->adapter); + if (!bd) + return dev_err_probe(&client->dev, -ENOSPC, "Only %d busses supported", + EE1004_MAX_BUSSES); + + i2c_set_clientdata(client, bd); + + if (++bd->dev_count == 1) { + /* Use 2 dummy devices for page select command */ + for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { + struct i2c_client *cl; + + cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); + if (IS_ERR(cl)) { + err = PTR_ERR(cl); + goto err_out; + } + + bd->set_page[cnr] = cl; + } + + /* Remember current page to avoid unneeded page select */ + err = ee1004_get_current_page(bd); + if (err < 0) + goto err_out; + + dev_dbg(&client->dev, "Currently selected page: %d\n", err); + bd->current_page = err; + } + + return 0; + +err_out: + ee1004_cleanup(cnr, bd); + + return err; +} + static int ee1004_probe(struct i2c_client *client) { struct nvmem_config config = { @@ -251,9 +294,8 @@ static int ee1004_probe(struct i2c_client *client) .compat = true, .base_dev = &client->dev, }; - struct ee1004_bus_data *bd; struct nvmem_device *ndev; - int err, cnr = 0; + int err; /* Make sure we can operate on this adapter */ if (!i2c_check_functionality(client->adapter, @@ -264,46 +306,21 @@ static int ee1004_probe(struct i2c_client *client) mutex_lock(&ee1004_bus_lock); - bd = ee1004_get_bus_data(client->adapter); - if (!bd) { + err = ee1004_init_bus_data(client); + if (err < 0) { mutex_unlock(&ee1004_bus_lock); - return dev_err_probe(&client->dev, -ENOSPC, - "Only %d busses supported", EE1004_MAX_BUSSES); - } - - err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, bd); - if (err < 0) return err; - - i2c_set_clientdata(client, bd); - - if (++bd->dev_count == 1) { - /* Use 2 dummy devices for page select command */ - for (cnr = 0; cnr < EE1004_NUM_PAGES; cnr++) { - struct i2c_client *cl; - - cl = i2c_new_dummy_device(client->adapter, EE1004_ADDR_SET_PAGE + cnr); - if (IS_ERR(cl)) { - mutex_unlock(&ee1004_bus_lock); - return PTR_ERR(cl); - } - bd->set_page[cnr] = cl; - } - - /* Remember current page to avoid unneeded page select */ - err = ee1004_get_current_page(bd); - if (err < 0) { - mutex_unlock(&ee1004_bus_lock); - return err; - } - dev_dbg(&client->dev, "Currently selected page: %d\n", err); - bd->current_page = err; } ee1004_probe_temp_sensor(client); mutex_unlock(&ee1004_bus_lock); + err = devm_add_action_or_reset(&client->dev, ee1004_cleanup_bus_data, + i2c_get_clientdata(client)); + if (err < 0) + return err; + ndev = devm_nvmem_register(&client->dev, &config); if (IS_ERR(ndev)) return PTR_ERR(ndev); diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c index 5204fda51da3fc497b62c51de6c7d74987339f40..339d126414d4bf842d08c593e15bd6a3954bbf38 100644 --- a/drivers/misc/fastrpc.c +++ b/drivers/misc/fastrpc.c @@ -2085,16 +2085,6 @@ err_invoke: return err; } -static int is_attach_rejected(struct fastrpc_user *fl) -{ - /* Check if the device node is non-secure */ - if (!fl->is_secure_dev) { - dev_dbg(&fl->cctx->rpdev->dev, "untrusted app trying to attach to privileged DSP PD\n"); - return -EACCES; - } - return 0; -} - static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { @@ -2107,19 +2097,13 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd, err = fastrpc_invoke(fl, argp); break; case FASTRPC_IOCTL_INIT_ATTACH: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, ROOT_PD); + err = fastrpc_init_attach(fl, ROOT_PD); break; case FASTRPC_IOCTL_INIT_ATTACH_SNS: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_attach(fl, SENSORS_PD); + err = fastrpc_init_attach(fl, SENSORS_PD); break; case FASTRPC_IOCTL_INIT_CREATE_STATIC: - err = is_attach_rejected(fl); - if (!err) - err = fastrpc_init_create_static_process(fl, argp); + err = fastrpc_init_create_static_process(fl, argp); break; case FASTRPC_IOCTL_INIT_CREATE: err = fastrpc_init_create_process(fl, argp); diff --git a/drivers/misc/lkdtm/refcount.c b/drivers/misc/lkdtm/refcount.c index 5cd488f54cfa542ec3370030b0afcda2d75335c5..8f744bee6fbde83f196d71f0ce33447a0baeaeb5 100644 --- a/drivers/misc/lkdtm/refcount.c +++ b/drivers/misc/lkdtm/refcount.c @@ -182,6 +182,21 @@ static void lkdtm_REFCOUNT_SUB_AND_TEST_NEGATIVE(void) check_negative(&neg, 3); } +/* + * A refcount_sub_and_test() by zero when the counter is at zero should act like + * refcount_sub_and_test() above when going negative. + */ +static void lkdtm_REFCOUNT_SUB_AND_TEST_ZERO(void) +{ + refcount_t neg = REFCOUNT_INIT(0); + + pr_info("attempting bad refcount_sub_and_test() at zero\n"); + if (refcount_sub_and_test(0, &neg)) + pr_warn("Weird: refcount_sub_and_test() reported zero\n"); + + check_negative(&neg, 0); +} + static void check_from_zero(refcount_t *ref) { switch (refcount_read(ref)) { @@ -400,6 +415,7 @@ static struct crashtype crashtypes[] = { CRASHTYPE(REFCOUNT_DEC_NEGATIVE), CRASHTYPE(REFCOUNT_DEC_AND_TEST_NEGATIVE), CRASHTYPE(REFCOUNT_SUB_AND_TEST_NEGATIVE), + CRASHTYPE(REFCOUNT_SUB_AND_TEST_ZERO), CRASHTYPE(REFCOUNT_INC_ZERO), CRASHTYPE(REFCOUNT_ADD_ZERO), CRASHTYPE(REFCOUNT_INC_SATURATED), diff --git a/drivers/mmc/core/mmc_test.c b/drivers/mmc/core/mmc_test.c index 8f7f587a0025b36142bcaa1a3b9f5bf4eba4fd2b..b7f627a9fdeab91b5d69b16e77352ca331394e6d 100644 --- a/drivers/mmc/core/mmc_test.c +++ b/drivers/mmc/core/mmc_test.c @@ -3125,13 +3125,13 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, test->buffer = kzalloc(BUFFER_SIZE, GFP_KERNEL); #ifdef CONFIG_HIGHMEM test->highmem = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM, BUFFER_ORDER); + if (!test->highmem) { + count = -ENOMEM; + goto free_test_buffer; + } #endif -#ifdef CONFIG_HIGHMEM - if (test->buffer && test->highmem) { -#else if (test->buffer) { -#endif mutex_lock(&mmc_test_lock); mmc_test_run(test, testcase); mutex_unlock(&mmc_test_lock); @@ -3139,6 +3139,7 @@ static ssize_t mtf_test_write(struct file *file, const char __user *buf, #ifdef CONFIG_HIGHMEM __free_pages(test->highmem, BUFFER_ORDER); +free_test_buffer: #endif kfree(test->buffer); kfree(test); diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c index 2333ef4893ee0da0b63ad2b37ebf8b7ccb330f29..e9f6e4e622901a0e1a12544899613c04e7cc5791 100644 --- a/drivers/mmc/host/dw_mmc.c +++ b/drivers/mmc/host/dw_mmc.c @@ -3299,6 +3299,10 @@ int dw_mci_probe(struct dw_mci *host) host->biu_clk = devm_clk_get(host->dev, "biu"); if (IS_ERR(host->biu_clk)) { dev_dbg(host->dev, "biu clock not available\n"); + ret = PTR_ERR(host->biu_clk); + if (ret == -EPROBE_DEFER) + return ret; + } else { ret = clk_prepare_enable(host->biu_clk); if (ret) { @@ -3310,6 +3314,10 @@ int dw_mci_probe(struct dw_mci *host) host->ciu_clk = devm_clk_get(host->dev, "ciu"); if (IS_ERR(host->ciu_clk)) { dev_dbg(host->dev, "ciu clock not available\n"); + ret = PTR_ERR(host->ciu_clk); + if (ret == -EPROBE_DEFER) + goto err_clk_biu; + host->bus_hz = host->pdata->bus_hz; } else { ret = clk_prepare_enable(host->ciu_clk); diff --git a/drivers/mmc/host/mtk-sd.c b/drivers/mmc/host/mtk-sd.c index a94835b8ab9394305b50f89c4ea8d46813e57399..e386f78e326790165701f8c4a42e858700a60bfc 100644 --- a/drivers/mmc/host/mtk-sd.c +++ b/drivers/mmc/host/mtk-sd.c @@ -1230,7 +1230,7 @@ static bool msdc_cmd_done(struct msdc_host *host, int events, } if (!sbc_error && !(events & MSDC_INT_CMDRDY)) { - if (events & MSDC_INT_CMDTMO || + if ((events & MSDC_INT_CMDTMO && !host->hs400_tuning) || (!mmc_op_tuning(cmd->opcode) && !host->hs400_tuning)) /* * should not clear fifo/interrupt as the tune data @@ -1323,9 +1323,9 @@ static void msdc_start_command(struct msdc_host *host, static void msdc_cmd_next(struct msdc_host *host, struct mmc_request *mrq, struct mmc_command *cmd) { - if ((cmd->error && - !(cmd->error == -EILSEQ && - (mmc_op_tuning(cmd->opcode) || host->hs400_tuning))) || + if ((cmd->error && !host->hs400_tuning && + !(cmd->error == -EILSEQ && + mmc_op_tuning(cmd->opcode))) || (mrq->sbc && mrq->sbc->error)) msdc_request_done(host, mrq); else if (cmd == mrq->sbc) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 1cd92c12e7824c4d2cf3ab55a729428e280e6ab2..f74bacf071fca0d3c09d02e1d5e52bbbc7d632c2 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -582,7 +582,6 @@ static void bond_ipsec_del_sa_all(struct bonding *bond) } else { slave->dev->xfrmdev_ops->xdo_dev_state_delete(ipsec->xs); } - ipsec->xs->xso.real_dev = NULL; } spin_unlock_bh(&bond->ipsec_lock); rcu_read_unlock(); @@ -599,34 +598,30 @@ static bool bond_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *xs) struct net_device *real_dev; struct slave *curr_active; struct bonding *bond; - int err; + bool ok = false; bond = netdev_priv(bond_dev); rcu_read_lock(); curr_active = rcu_dereference(bond->curr_active_slave); + if (!curr_active) + goto out; real_dev = curr_active->dev; - if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) { - err = false; + if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) goto out; - } - if (!xs->xso.real_dev) { - err = false; + if (!xs->xso.real_dev) goto out; - } if (!real_dev->xfrmdev_ops || !real_dev->xfrmdev_ops->xdo_dev_offload_ok || - netif_is_bond_master(real_dev)) { - err = false; + netif_is_bond_master(real_dev)) goto out; - } - err = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); + ok = real_dev->xfrmdev_ops->xdo_dev_offload_ok(skb, xs); out: rcu_read_unlock(); - return err; + return ok; } static const struct xfrmdev_ops bond_xfrmdev_ops = { diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index bc80fb6397dcd09aa8b544f08804a0a5f1f4705c..95d59a18c0223fbd956d977482797ee4942d67e3 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -936,7 +936,7 @@ static int bond_option_active_slave_set(struct bonding *bond, /* check to see if we are clearing active */ if (!slave_dev) { netdev_dbg(bond->dev, "Clearing current active slave\n"); - RCU_INIT_POINTER(bond->curr_active_slave, NULL); + bond_change_active_slave(bond, NULL); bond_select_active_slave(bond); } else { struct slave *old_active = rtnl_dereference(bond->curr_active_slave); diff --git a/drivers/net/can/usb/etas_es58x/es58x_devlink.c b/drivers/net/can/usb/etas_es58x/es58x_devlink.c index 635edeb8f68cdf5e33132027331fb77ea66dedbf..eee20839d96fd4142d43e26ddf86941051d7bb5a 100644 --- a/drivers/net/can/usb/etas_es58x/es58x_devlink.c +++ b/drivers/net/can/usb/etas_es58x/es58x_devlink.c @@ -215,7 +215,7 @@ static int es58x_devlink_info_get(struct devlink *devlink, struct es58x_sw_version *fw_ver = &es58x_dev->firmware_version; struct es58x_sw_version *bl_ver = &es58x_dev->bootloader_version; struct es58x_hw_revision *hw_rev = &es58x_dev->hardware_revision; - char buf[max(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; + char buf[MAX(sizeof("xx.xx.xx"), sizeof("axxx/xxx"))]; int ret = 0; if (es58x_sw_version_is_valid(fw_ver)) { diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index ed1e6560df25ec270d1716672f42f6861e06c443..0e663ec0c12a3bcbd865f29b13596dae3b6a6a30 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -675,8 +675,10 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) of_remove_property(child, prop); phydev = of_phy_find_device(child); - if (phydev) + if (phydev) { phy_device_remove(phydev); + phy_device_free(phydev); + } } err = mdiobus_register(priv->user_mii_bus); diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c index b074b4bb062964850042634732438682f43e192d..1491099528be8d3dac61612f980f69c698c1beab 100644 --- a/drivers/net/dsa/microchip/ksz_common.c +++ b/drivers/net/dsa/microchip/ksz_common.c @@ -2578,7 +2578,11 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) if (!port) return MICREL_KSZ8_P1_ERRATA; break; + case KSZ8567_CHIP_ID: case KSZ9477_CHIP_ID: + case KSZ9567_CHIP_ID: + case KSZ9896_CHIP_ID: + case KSZ9897_CHIP_ID: /* KSZ9477 Errata DS80000754C * * Module 4: Energy Efficient Ethernet (EEE) feature select must @@ -2588,6 +2592,13 @@ static u32 ksz_get_phy_flags(struct dsa_switch *ds, int port) * controls. If not disabled, the PHY ports can auto-negotiate * to enable EEE, and this feature can cause link drops when * linked to another device supporting EEE. + * + * The same item appears in the errata for the KSZ9567, KSZ9896, + * and KSZ9897. + * + * A similar item appears in the errata for the KSZ8567, but + * provides an alternative workaround. For now, use the simple + * workaround of disabling the EEE feature for this device too. */ return MICREL_NO_EEE; } @@ -3764,6 +3775,11 @@ static int ksz_port_set_mac_address(struct dsa_switch *ds, int port, return -EBUSY; } + /* Need to initialize variable as the code to fill in settings may + * not be executed. + */ + wol.wolopts = 0; + ksz_get_wol(ds, dp->index, &wol); if (wol.wolopts & WAKE_MAGIC) { dev_err(ds->dev, diff --git a/drivers/net/dsa/microchip/ksz_ptp.c b/drivers/net/dsa/microchip/ksz_ptp.c index f0bd46e5d4ec0f64031caf544cca0931ebcf9ae5..050f17c43ef60de962ecd34d8171960346ffcd48 100644 --- a/drivers/net/dsa/microchip/ksz_ptp.c +++ b/drivers/net/dsa/microchip/ksz_ptp.c @@ -266,7 +266,6 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) struct ksz_port *prt; struct dsa_port *dp; bool tag_en = false; - int ret; dsa_switch_for_each_user_port(dp, dev->ds) { prt = &dev->ports[dp->index]; @@ -277,9 +276,7 @@ static int ksz_ptp_enable_mode(struct ksz_device *dev) } if (tag_en) { - ret = ptp_schedule_worker(ptp_data->clock, 0); - if (ret) - return ret; + ptp_schedule_worker(ptp_data->clock, 0); } else { ptp_cancel_worker_sync(ptp_data->clock); } diff --git a/drivers/net/dsa/mv88e6xxx/global1_atu.c b/drivers/net/dsa/mv88e6xxx/global1_atu.c index ce3b3690c3c057235d78b1c701d97aad26f24b00..c47f068f56b32afe71c6da4fe6700e1881b583d9 100644 --- a/drivers/net/dsa/mv88e6xxx/global1_atu.c +++ b/drivers/net/dsa/mv88e6xxx/global1_atu.c @@ -457,7 +457,8 @@ static irqreturn_t mv88e6xxx_g1_atu_prob_irq_thread_fn(int irq, void *dev_id) trace_mv88e6xxx_atu_full_violation(chip->dev, spid, entry.portvec, entry.mac, fid); - chip->ports[spid].atu_full_violation++; + if (spid < ARRAY_SIZE(chip->ports)) + chip->ports[spid].atu_full_violation++; } return IRQ_HANDLED; diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index e554699f06d4191208c498fac335f2dce98f4872..4a705f7333f43b240654bba259ece102388b95d2 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -61,11 +61,46 @@ static int felix_cpu_port_for_conduit(struct dsa_switch *ds, return cpu_dp->index; } +/** + * felix_update_tag_8021q_rx_rule - Update VCAP ES0 tag_8021q rule after + * vlan_filtering change + * @outer_tagging_rule: Pointer to VCAP filter on which the update is performed + * @vlan_filtering: Current bridge VLAN filtering setting + * + * Source port identification for tag_8021q is done using VCAP ES0 rules on the + * CPU port(s). The ES0 tag B (inner tag from the packet) can be configured as + * either: + * - push_inner_tag=0: the inner tag is never pushed into the frame + * (and we lose info about the classified VLAN). This is + * good when the classified VLAN is a discardable quantity + * for the software RX path: it is either set to + * OCELOT_STANDALONE_PVID, or to + * ocelot_vlan_unaware_pvid(bridge). + * - push_inner_tag=1: the inner tag is always pushed. This is good when the + * classified VLAN is not a discardable quantity (the port + * is under a VLAN-aware bridge, and software needs to + * continue processing the packet in the same VLAN as the + * hardware). + * The point is that what is good for a VLAN-unaware port is not good for a + * VLAN-aware port, and vice versa. Thus, the RX tagging rules must be kept in + * sync with the VLAN filtering state of the port. + */ +static void +felix_update_tag_8021q_rx_rule(struct ocelot_vcap_filter *outer_tagging_rule, + bool vlan_filtering) +{ + if (vlan_filtering) + outer_tagging_rule->action.push_inner_tag = OCELOT_ES0_TAG; + else + outer_tagging_rule->action.push_inner_tag = OCELOT_NO_ES0_TAG; +} + /* Set up VCAP ES0 rules for pushing a tag_8021q VLAN towards the CPU such that * the tagger can perform RX source port identification. */ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, - int upstream, u16 vid) + int upstream, u16 vid, + bool vlan_filtering) { struct ocelot_vcap_filter *outer_tagging_rule; struct ocelot *ocelot = ds->priv; @@ -96,6 +131,14 @@ static int felix_tag_8021q_vlan_add_rx(struct dsa_switch *ds, int port, outer_tagging_rule->action.tag_a_tpid_sel = OCELOT_TAG_TPID_SEL_8021AD; outer_tagging_rule->action.tag_a_vid_sel = 1; outer_tagging_rule->action.vid_a_val = vid; + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + outer_tagging_rule->action.tag_b_tpid_sel = OCELOT_TAG_TPID_SEL_8021Q; + /* Leave TAG_B_VID_SEL at 0 (Classified VID + VID_B_VAL). Since we also + * leave VID_B_VAL at 0, this makes ES0 tag B (the inner tag) equal to + * the classified VID, which we need to see in the DSA tagger's receive + * path. Note: the inner tag is only visible in the packet when pushed + * (push_inner_tag == OCELOT_ES0_TAG). + */ err = ocelot_vcap_filter_add(ocelot, outer_tagging_rule, NULL); if (err) @@ -227,6 +270,7 @@ static int felix_tag_8021q_vlan_del_tx(struct dsa_switch *ds, int port, u16 vid) static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, u16 flags) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; @@ -234,11 +278,12 @@ static int felix_tag_8021q_vlan_add(struct dsa_switch *ds, int port, u16 vid, * membership, which we aren't. So we don't need to add any VCAP filter * for the CPU port. */ - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { - err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + err = felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); if (err) return err; } @@ -258,10 +303,11 @@ add_tx_failed: static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) { + struct dsa_port *dp = dsa_to_port(ds, port); struct dsa_port *cpu_dp; int err; - if (!dsa_is_user_port(ds, port)) + if (!dsa_port_is_user(dp)) return 0; dsa_switch_for_each_cpu_port(cpu_dp, ds) { @@ -278,11 +324,41 @@ static int felix_tag_8021q_vlan_del(struct dsa_switch *ds, int port, u16 vid) del_tx_failed: dsa_switch_for_each_cpu_port(cpu_dp, ds) - felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid); + felix_tag_8021q_vlan_add_rx(ds, port, cpu_dp->index, vid, + dsa_port_is_vlan_filtering(dp)); return err; } +static int felix_update_tag_8021q_rx_rules(struct dsa_switch *ds, int port, + bool vlan_filtering) +{ + struct ocelot_vcap_filter *outer_tagging_rule; + struct ocelot_vcap_block *block_vcap_es0; + struct ocelot *ocelot = ds->priv; + struct dsa_port *cpu_dp; + unsigned long cookie; + int err; + + block_vcap_es0 = &ocelot->block[VCAP_ES0]; + + dsa_switch_for_each_cpu_port(cpu_dp, ds) { + cookie = OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, + cpu_dp->index); + + outer_tagging_rule = ocelot_vcap_block_find_filter_by_id(block_vcap_es0, + cookie, false); + + felix_update_tag_8021q_rx_rule(outer_tagging_rule, vlan_filtering); + + err = ocelot_vcap_filter_replace(ocelot, outer_tagging_rule); + if (err) + return err; + } + + return 0; +} + static int felix_trap_get_cpu_port(struct dsa_switch *ds, const struct ocelot_vcap_filter *trap) { @@ -528,7 +604,19 @@ static int felix_tag_8021q_setup(struct dsa_switch *ds) * so we need to be careful that there are no extra frames to be * dequeued over MMIO, since we would never know to discard them. */ + ocelot_lock_xtr_grp_bh(ocelot, 0); ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp_bh(ocelot, 0); + + /* Problem: when using push_inner_tag=1 for ES0 tag B, we lose info + * about whether the received packets were VLAN-tagged on the wire, + * since they are always tagged on egress towards the CPU port. + * + * Since using push_inner_tag=1 is unavoidable for VLAN-aware bridges, + * we must work around the fallout by untagging in software to make + * untagged reception work more or less as expected. + */ + ds->untag_vlan_aware_bridge_pvid = true; return 0; } @@ -554,6 +642,8 @@ static void felix_tag_8021q_teardown(struct dsa_switch *ds) ocelot_port_teardown_dsa_8021q_cpu(ocelot, dp->index); dsa_tag_8021q_unregister(ds); + + ds->untag_vlan_aware_bridge_pvid = false; } static unsigned long felix_tag_8021q_get_host_fwd_mask(struct dsa_switch *ds) @@ -1008,8 +1098,23 @@ static int felix_vlan_filtering(struct dsa_switch *ds, int port, bool enabled, struct netlink_ext_ack *extack) { struct ocelot *ocelot = ds->priv; + bool using_tag_8021q; + struct felix *felix; + int err; - return ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + err = ocelot_port_vlan_filtering(ocelot, port, enabled, extack); + if (err) + return err; + + felix = ocelot_to_felix(ocelot); + using_tag_8021q = felix->tag_proto == DSA_TAG_PROTO_OCELOT_8021Q; + if (using_tag_8021q) { + err = felix_update_tag_8021q_rx_rules(ds, port, enabled); + if (err) + return err; + } + + return 0; } static int felix_vlan_add(struct dsa_switch *ds, int port, @@ -1518,6 +1623,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) int port = xmit_work->dp->index; int retries = 10; + ocelot_lock_inj_grp(ocelot, 0); + do { if (ocelot_can_inject(ocelot, 0)) break; @@ -1526,6 +1633,7 @@ static void felix_port_deferred_xmit(struct kthread_work *work) } while (--retries); if (!retries) { + ocelot_unlock_inj_grp(ocelot, 0); dev_err(ocelot->dev, "port %d failed to inject skb\n", port); ocelot_port_purge_txtstamp_skb(ocelot, port, skb); @@ -1535,6 +1643,8 @@ static void felix_port_deferred_xmit(struct kthread_work *work) ocelot_port_inject_frame(ocelot, port, 0, rew_op, skb); + ocelot_unlock_inj_grp(ocelot, 0); + consume_skb(skb); kfree(xmit_work); } @@ -1694,6 +1804,8 @@ static bool felix_check_xtr_pkt(struct ocelot *ocelot) if (!felix->info->quirk_no_xtr_irq) return false; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; unsigned int type; @@ -1730,6 +1842,8 @@ out: ocelot_drain_cpu_queue(ocelot, 0); } + ocelot_unlock_xtr_grp(ocelot, grp); + return true; } diff --git a/drivers/net/dsa/vitesse-vsc73xx-core.c b/drivers/net/dsa/vitesse-vsc73xx-core.c index d9d3e30fd47ad4d357224be4024a7afe9940a795..e3f95d2cc2c1689557f463edcdea4f4fb559f6a2 100644 --- a/drivers/net/dsa/vitesse-vsc73xx-core.c +++ b/drivers/net/dsa/vitesse-vsc73xx-core.c @@ -40,6 +40,10 @@ #define VSC73XX_BLOCK_ARBITER 0x5 /* Only subblock 0 */ #define VSC73XX_BLOCK_SYSTEM 0x7 /* Only subblock 0 */ +/* MII Block subblock */ +#define VSC73XX_BLOCK_MII_INTERNAL 0x0 /* Internal MDIO subblock */ +#define VSC73XX_BLOCK_MII_EXTERNAL 0x1 /* External MDIO subblock */ + #define CPU_PORT 6 /* CPU port */ /* MAC Block registers */ @@ -225,6 +229,8 @@ #define VSC73XX_MII_CMD 0x1 #define VSC73XX_MII_DATA 0x2 +#define VSC73XX_MII_STAT_BUSY BIT(3) + /* Arbiter block 5 registers */ #define VSC73XX_ARBEMPTY 0x0c #define VSC73XX_ARBDISC 0x0e @@ -299,6 +305,7 @@ #define IS_739X(a) (IS_7395(a) || IS_7398(a)) #define VSC73XX_POLL_SLEEP_US 1000 +#define VSC73XX_MDIO_POLL_SLEEP_US 5 #define VSC73XX_POLL_TIMEOUT_US 10000 struct vsc73xx_counter { @@ -527,6 +534,22 @@ static int vsc73xx_detect(struct vsc73xx *vsc) return 0; } +static int vsc73xx_mdio_busy_check(struct vsc73xx *vsc) +{ + int ret, err; + u32 val; + + ret = read_poll_timeout(vsc73xx_read, err, + err < 0 || !(val & VSC73XX_MII_STAT_BUSY), + VSC73XX_MDIO_POLL_SLEEP_US, + VSC73XX_POLL_TIMEOUT_US, false, vsc, + VSC73XX_BLOCK_MII, VSC73XX_BLOCK_MII_INTERNAL, + VSC73XX_MII_STAT, &val); + if (ret) + return ret; + return err; +} + static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) { struct vsc73xx *vsc = ds->priv; @@ -534,12 +557,20 @@ static int vsc73xx_phy_read(struct dsa_switch *ds, int phy, int regnum) u32 val; int ret; + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + /* Setting bit 26 means "read" */ cmd = BIT(26) | (phy << 21) | (regnum << 16); ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; - msleep(2); + + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; + ret = vsc73xx_read(vsc, VSC73XX_BLOCK_MII, 0, 2, &val); if (ret) return ret; @@ -563,18 +594,11 @@ static int vsc73xx_phy_write(struct dsa_switch *ds, int phy, int regnum, u32 cmd; int ret; - /* It was found through tedious experiments that this router - * chip really hates to have it's PHYs reset. They - * never recover if that happens: autonegotiation stops - * working after a reset. Just filter out this command. - * (Resetting the whole chip is OK.) - */ - if (regnum == 0 && (val & BIT(15))) { - dev_info(vsc->dev, "reset PHY - disallowed\n"); - return 0; - } + ret = vsc73xx_mdio_busy_check(vsc); + if (ret) + return ret; - cmd = (phy << 21) | (regnum << 16); + cmd = (phy << 21) | (regnum << 16) | val; ret = vsc73xx_write(vsc, VSC73XX_BLOCK_MII, 0, 1, cmd); if (ret) return ret; @@ -957,6 +981,11 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, if (duplex == DUPLEX_FULL) val |= VSC73XX_MAC_CFG_FDX; + else + /* In datasheet description ("Port Mode Procedure" in 5.6.2) + * this bit is configured only for half duplex. + */ + val |= VSC73XX_MAC_CFG_WEXC_DIS; /* This routine is described in the datasheet (below ARBDISC register * description) @@ -967,7 +996,6 @@ static void vsc73xx_mac_link_up(struct phylink_config *config, get_random_bytes(&seed, 1); val |= seed << VSC73XX_MAC_CFG_SEED_OFFSET; val |= VSC73XX_MAC_CFG_SEED_LOAD; - val |= VSC73XX_MAC_CFG_WEXC_DIS; /* Those bits are responsible for MTU only. Kernel takes care about MTU, * let's enable +8 bytes frame length unconditionally. diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index ffa74c26ee5325526870c36b39212cc3a7b327cd..04a623b3eee29e2ddd58ac536d81d40107738d63 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -5056,7 +5056,7 @@ void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr) list_del_init(&fltr->list); } -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) +static void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all) { struct bnxt_filter_base *usr_fltr, *tmp; @@ -7591,19 +7591,20 @@ static bool bnxt_need_reserve_rings(struct bnxt *bp) int rx = bp->rx_nr_rings, stat; int vnic, grp = rx; - if (hw_resc->resv_tx_rings != bp->tx_nr_rings && - bp->hwrm_spec_code >= 0x10601) - return true; - /* Old firmware does not need RX ring reservations but we still * need to setup a default RSS map when needed. With new firmware * we go through RX ring reservations first and then set up the * RSS map for the successfully reserved RX rings when needed. */ - if (!BNXT_NEW_RM(bp)) { + if (!BNXT_NEW_RM(bp)) bnxt_check_rss_tbl_no_rmgr(bp); + + if (hw_resc->resv_tx_rings != bp->tx_nr_rings && + bp->hwrm_spec_code >= 0x10601) + return true; + + if (!BNXT_NEW_RM(bp)) return false; - } vnic = bnxt_get_total_vnics(bp, rx); @@ -7649,8 +7650,8 @@ static int bnxt_get_avail_msix(struct bnxt *bp, int num); static int __bnxt_reserve_rings(struct bnxt *bp) { struct bnxt_hw_rings hwr = {0}; + int rx_rings, old_rx_rings, rc; int cp = bp->cp_nr_rings; - int rx_rings, rc; int ulp_msix = 0; bool sh = false; int tx_cp; @@ -7684,6 +7685,7 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.grp = bp->rx_nr_rings; hwr.rss_ctx = bnxt_get_total_rss_ctxs(bp, &hwr); hwr.stat = bnxt_get_func_stat_ctxs(bp); + old_rx_rings = bp->hw_resc.resv_rx_rings; rc = bnxt_hwrm_reserve_rings(bp, &hwr); if (rc) @@ -7738,7 +7740,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) if (!bnxt_rings_ok(bp, &hwr)) return -ENOMEM; - if (!netif_is_rxfh_configured(bp->dev)) + if (old_rx_rings != bp->hw_resc.resv_rx_rings && + !netif_is_rxfh_configured(bp->dev)) bnxt_set_dflt_rss_indir_tbl(bp, NULL); if (!bnxt_ulp_registered(bp->edev) && BNXT_NEW_RM(bp)) { @@ -10245,7 +10248,7 @@ static void bnxt_hwrm_realloc_rss_ctx_vnic(struct bnxt *bp) } } -void bnxt_clear_rss_ctxs(struct bnxt *bp) +static void bnxt_clear_rss_ctxs(struct bnxt *bp) { struct ethtool_rxfh_context *ctx; unsigned long context; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 6bbdc718c3a701ccc26b716f2680e89ad5ad0c18..059a6f81c1a871e91d5237b60cfb526c8ffc7c56 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -2790,7 +2790,6 @@ void bnxt_set_ring_params(struct bnxt *); int bnxt_set_rx_skb_mode(struct bnxt *bp, bool page_mode); void bnxt_insert_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); void bnxt_del_one_usr_fltr(struct bnxt *bp, struct bnxt_filter_base *fltr); -void bnxt_clear_usr_fltrs(struct bnxt *bp, bool all); int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp, unsigned long *bmap, int bmap_size, bool async_only); int bnxt_hwrm_func_drv_unrgtr(struct bnxt *bp); @@ -2842,7 +2841,6 @@ int bnxt_hwrm_vnic_rss_cfg_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); int __bnxt_setup_vnic_p5(struct bnxt *bp, struct bnxt_vnic_info *vnic); void bnxt_del_one_rss_ctx(struct bnxt *bp, struct bnxt_rss_ctx *rss_ctx, bool all); -void bnxt_clear_rss_ctxs(struct bnxt *bp); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_half_open_nic(struct bnxt *bp); void bnxt_half_close_nic(struct bnxt *bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index d00ef0063820d74483f2c088f014d1f499baf77c..4cf9bf8b01b09d8218943a01bc9d76e0b6d7e5a3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -968,9 +968,6 @@ static int bnxt_set_channels(struct net_device *dev, return -EINVAL; } - bnxt_clear_usr_fltrs(bp, true); - if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) - bnxt_clear_rss_ctxs(bp); if (netif_running(dev)) { if (BNXT_PF(bp)) { /* TODO CHIMP_FW: Send message to all VF's @@ -1863,8 +1860,14 @@ static void bnxt_modify_rss(struct bnxt *bp, struct ethtool_rxfh_context *ctx, } static int bnxt_rxfh_context_check(struct bnxt *bp, + const struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack) { + if (rxfh->hfunc && rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "RSS hash function not supported"); + return -EOPNOTSUPP; + } + if (!BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) { NL_SET_ERR_MSG_MOD(extack, "RSS contexts not supported"); return -EOPNOTSUPP; @@ -1888,7 +1891,7 @@ static int bnxt_create_rxfh_context(struct net_device *dev, struct bnxt_vnic_info *vnic; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -1915,8 +1918,12 @@ static int bnxt_create_rxfh_context(struct net_device *dev, if (rc) goto out; + /* Populate defaults in the context */ bnxt_set_dflt_rss_indir_tbl(bp, ctx); + ctx->hfunc = ETH_RSS_HASH_TOP; memcpy(vnic->rss_hash_key, bp->rss_hash_key, HW_HASH_KEY_SIZE); + memcpy(ethtool_rxfh_context_key(ctx), + bp->rss_hash_key, HW_HASH_KEY_SIZE); rc = bnxt_hwrm_vnic_alloc(bp, vnic, 0, bp->rx_nr_rings); if (rc) { @@ -1953,7 +1960,7 @@ static int bnxt_modify_rxfh_context(struct net_device *dev, struct bnxt_rss_ctx *rss_ctx; int rc; - rc = bnxt_rxfh_context_check(bp, extack); + rc = bnxt_rxfh_context_check(bp, rxfh, extack); if (rc) return rc; @@ -1990,7 +1997,6 @@ static int bnxt_set_rxfh(struct net_device *dev, bnxt_modify_rss(bp, NULL, NULL, rxfh); - bnxt_clear_usr_fltrs(bp, false); if (netif_running(bp->dev)) { bnxt_close_nic(bp, false, false); rc = bnxt_open_nic(bp, false, false); @@ -5280,7 +5286,7 @@ void bnxt_ethtool_free(struct bnxt *bp) const struct ethtool_ops bnxt_ethtool_ops = { .cap_link_lanes_supported = 1, .cap_rss_ctx_supported = 1, - .rxfh_max_context_id = BNXT_MAX_ETH_RSS_CTX, + .rxfh_max_num_contexts = BNXT_MAX_ETH_RSS_CTX + 1, .rxfh_indir_space = BNXT_MAX_RSS_TABLE_ENTRIES_P5, .rxfh_priv_size = sizeof(struct bnxt_rss_ctx), .supported_coalesce_params = ETHTOOL_COALESCE_USECS | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 345681d5007e35cb42fceb9b0a039d710872190b..f88b641533fcc55d3ec11e497bb3a7a0d39c0575 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -297,11 +297,6 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons, * redirect is coming from a frame received by the * bnxt_en driver. */ - rx_buf = &rxr->rx_buf_ring[cons]; - mapping = rx_buf->mapping - bp->rx_dma_offset; - dma_unmap_page_attrs(&pdev->dev, mapping, - BNXT_RX_PAGE_SIZE, bp->rx_dir, - DMA_ATTR_WEAK_ORDERING); /* if we are unable to allocate a new buffer, abort and reuse */ if (bnxt_alloc_rx_data(bp, rxr, rxr->rx_prod, GFP_ATOMIC)) { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 1248792d7fd4d2f98847889c499277a92229d785..0715ea5bf13ed9df75eeb4f50e99ada8cef631e9 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -42,19 +42,15 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; - if (dev->phydev) { + if (dev->phydev) phy_ethtool_get_wol(dev->phydev, wol); - if (wol->supported) - return; - } - if (!device_can_wakeup(kdev)) { - wol->supported = 0; - wol->wolopts = 0; + /* MAC is not wake-up capable, return what the PHY does */ + if (!device_can_wakeup(kdev)) return; - } - wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; + /* Overlay MAC capabilities with that of the PHY queried before */ + wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; wol->wolopts = priv->wolopts; memset(wol->sopass, 0, sizeof(wol->sopass)); diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 11665be3a22c7a352578d608ee093a3f2aea7b95..dcd3f54ed0cf009ea0813c00ed2a6faf4a5f2399 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -5250,8 +5250,8 @@ static int __maybe_unused macb_suspend(struct device *dev) if (bp->wol & MACB_WOL_ENABLED) { /* Check for IP address in WOL ARP mode */ idev = __in_dev_get_rcu(bp->dev); - if (idev && idev->ifa_list) - ifa = rcu_access_pointer(idev->ifa_list); + if (idev) + ifa = rcu_dereference(idev->ifa_list); if ((bp->wolopts & WAKE_ARP) && !ifa) { netdev_err(netdev, "IP address not assigned as required by WoL walk ARP\n"); return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index a40c266c37f207c1966375058b8117c81f2629fe..608cc6af5af1c7b01e68babfa190f453b7b1cdcc 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1054,18 +1054,12 @@ static int phy_interface_mode(u8 lmac_type) static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { - struct lmac *lmac, **priv; + struct lmac *lmac; u64 cfg; lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - lmac->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); - if (!lmac->netdev) - return -ENOMEM; - priv = netdev_priv(lmac->netdev); - *priv = lmac; - if ((lmac->lmac_type == BGX_MODE_SGMII) || (lmac->lmac_type == BGX_MODE_QSGMII) || (lmac->lmac_type == BGX_MODE_RGMII)) { @@ -1191,7 +1185,6 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); - free_netdev(lmac->netdev); lmac->phydev = NULL; } @@ -1653,6 +1646,23 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) bgx_get_qlm_mode(bgx); + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + struct lmac *lmacp, **priv; + + lmacp = &bgx->lmac[lmac]; + lmacp->netdev = alloc_netdev_dummy(sizeof(struct lmac *)); + + if (!lmacp->netdev) { + for (int i = 0; i < lmac; i++) + free_netdev(bgx->lmac[i].netdev); + err = -ENOMEM; + goto err_enable; + } + + priv = netdev_priv(lmacp->netdev); + *priv = lmacp; + } + err = bgx_init_phy(bgx); if (err) goto err_enable; @@ -1692,8 +1702,10 @@ static void bgx_remove(struct pci_dev *pdev) u8 lmac; /* Disable all LMACs */ - for (lmac = 0; lmac < bgx->lmac_count; lmac++) + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { bgx_lmac_disable(bgx, lmac); + free_netdev(bgx->lmac[lmac].netdev); + } pci_free_irq(pdev, GMPX_GMI_TX_INT, bgx); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index 786ceae3448875666135a34549a062b6809fc503..dd9e68465e697835d57e422f1c129d942629e545 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -1244,7 +1244,8 @@ static u64 hash_filter_ntuple(struct ch_filter_specification *fs, * in the Compressed Filter Tuple. */ if (tp->vlan_shift >= 0 && fs->mask.ivlan) - ntuple |= (FT_VLAN_VLD_F | fs->val.ivlan) << tp->vlan_shift; + ntuple |= (u64)(FT_VLAN_VLD_F | + fs->val.ivlan) << tp->vlan_shift; if (tp->port_shift >= 0 && fs->mask.iport) ntuple |= (u64)fs->val.iport << tp->port_shift; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index a71f848adc0545f25e6bada7b274d0d7affbe66c..a293b08f36d46dfde7e25412951da78c15e2dfd6 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2638,13 +2638,14 @@ static int dpaa2_switch_refill_bp(struct ethsw_core *ethsw) static int dpaa2_switch_seed_bp(struct ethsw_core *ethsw) { - int *count, i; + int *count, ret, i; for (i = 0; i < DPAA2_ETHSW_NUM_BUFS; i += BUFS_PER_CMD) { + ret = dpaa2_switch_add_bufs(ethsw, ethsw->bpid); count = ðsw->buf_count; - *count += dpaa2_switch_add_bufs(ethsw, ethsw->bpid); + *count += ret; - if (unlikely(*count < BUFS_PER_CMD)) + if (unlikely(ret < BUFS_PER_CMD)) return -ENOMEM; } diff --git a/drivers/net/ethernet/freescale/fec_ptp.c b/drivers/net/ethernet/freescale/fec_ptp.c index e32f6724f5681627dde69b6502583f8aeb02ca3a..2e4f3e1782a2520c45912b414fd340db298b1fe7 100644 --- a/drivers/net/ethernet/freescale/fec_ptp.c +++ b/drivers/net/ethernet/freescale/fec_ptp.c @@ -775,6 +775,9 @@ void fec_ptp_stop(struct platform_device *pdev) struct net_device *ndev = platform_get_drvdata(pdev); struct fec_enet_private *fep = netdev_priv(ndev); + if (fep->pps_enable) + fec_ptp_enable_pps(fep, 0); + cancel_delayed_work_sync(&fep->time_keep); hrtimer_cancel(&fep->perout_timer); if (fep->ptp_clock) diff --git a/drivers/net/ethernet/google/gve/gve_ethtool.c b/drivers/net/ethernet/google/gve/gve_ethtool.c index 3480ff5c7ed6a7b8828c559e055a6154dd15cc7e..5a8b490ab3ad0861955dd4a440022104dfd18b62 100644 --- a/drivers/net/ethernet/google/gve/gve_ethtool.c +++ b/drivers/net/ethernet/google/gve/gve_ethtool.c @@ -495,7 +495,7 @@ static int gve_set_channels(struct net_device *netdev, return -EINVAL; } - if (!netif_carrier_ok(netdev)) { + if (!netif_running(netdev)) { priv->tx_cfg.num_queues = new_tx; priv->rx_cfg.num_queues = new_rx; return 0; diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index 9744b426940e46c75da826c97b2afa44fe2b1ee0..661566db68c86018c0b46b4427d41e819db09b4f 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -1566,7 +1566,7 @@ static int gve_set_xdp(struct gve_priv *priv, struct bpf_prog *prog, u32 status; old_prog = READ_ONCE(priv->xdp_prog); - if (!netif_carrier_ok(priv->dev)) { + if (!netif_running(priv->dev)) { WRITE_ONCE(priv->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); @@ -1847,7 +1847,7 @@ int gve_adjust_queues(struct gve_priv *priv, rx_alloc_cfg.qcfg = &new_rx_config; tx_alloc_cfg.num_rings = new_tx_config.num_queues; - if (netif_carrier_ok(priv->dev)) { + if (netif_running(priv->dev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); return err; } @@ -2064,7 +2064,7 @@ static int gve_set_features(struct net_device *netdev, if ((netdev->features & NETIF_F_LRO) != (features & NETIF_F_LRO)) { netdev->features ^= NETIF_F_LRO; - if (netif_carrier_ok(netdev)) { + if (netif_running(netdev)) { err = gve_adjust_config(priv, &tx_alloc_cfg, &rx_alloc_cfg); if (err) goto revert_features; @@ -2359,7 +2359,7 @@ err: int gve_reset(struct gve_priv *priv, bool attempt_teardown) { - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); int err; dev_info(&priv->pdev->dev, "Performing reset\n"); @@ -2700,7 +2700,7 @@ static void gve_shutdown(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); rtnl_lock(); if (was_up && gve_close(priv->dev)) { @@ -2718,7 +2718,7 @@ static int gve_suspend(struct pci_dev *pdev, pm_message_t state) { struct net_device *netdev = pci_get_drvdata(pdev); struct gve_priv *priv = netdev_priv(netdev); - bool was_up = netif_carrier_ok(priv->dev); + bool was_up = netif_running(priv->dev); priv->suspend_cnt++; rtnl_lock(); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c index a5fc0209d628eb921e6cbe0504309529c50d58a3..4cbc4d069a1f369eaec2fec6b3412a0fce186782 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c @@ -5724,6 +5724,9 @@ static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle) struct net_device *netdev = handle->kinfo.netdev; struct hns3_nic_priv *priv = netdev_priv(netdev); + if (!test_bit(HNS3_NIC_STATE_DOWN, &priv->state)) + hns3_nic_net_stop(netdev); + if (!test_and_clear_bit(HNS3_NIC_STATE_INITED, &priv->state)) { netdev_warn(netdev, "already uninitialized\n"); return 0; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c index e132c2f09560984139c8089635272099cfb290d9..cc7f46c0b35ff64582098a2ef74e227b18a5c36b 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.c @@ -1598,8 +1598,7 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) { u32 loop_para[HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE] = {0}; struct hclge_mod_reg_common_msg msg; - u8 i, j, num; - u32 loop_time; + u8 i, j, num, loop_time; num = ARRAY_SIZE(hclge_ssu_reg_common_msg); for (i = 0; i < num; i++) { @@ -1609,7 +1608,8 @@ static void hclge_query_reg_info_of_ssu(struct hclge_dev *hdev) loop_time = 1; loop_para[0] = 0; if (msg.need_para) { - loop_time = hdev->ae_dev->dev_specs.tnl_num; + loop_time = min(hdev->ae_dev->dev_specs.tnl_num, + HCLGE_MOD_MSG_PARA_ARRAY_MAX_SIZE); for (j = 0; j < loop_time; j++) loop_para[j] = j + 1; } diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index 82574ce0194fbaceab0b78b4159ff68b6cc4ae29..6c33195a1168f8b6af468b0e05d36cc5a9b3dde9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2653,8 +2653,17 @@ static int hclge_cfg_mac_speed_dup_h(struct hnae3_handle *handle, int speed, { struct hclge_vport *vport = hclge_get_vport(handle); struct hclge_dev *hdev = vport->back; + int ret; + + ret = hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + + if (ret) + return ret; - return hclge_cfg_mac_speed_dup(hdev, speed, duplex, lane_num); + hdev->hw.mac.req_speed = speed; + hdev->hw.mac.req_duplex = duplex; + + return 0; } static int hclge_set_autoneg_en(struct hclge_dev *hdev, bool enable) @@ -2956,17 +2965,20 @@ static int hclge_mac_init(struct hclge_dev *hdev) if (!test_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) hdev->hw.mac.duplex = HCLGE_MAC_FULL; - ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.speed, - hdev->hw.mac.duplex, hdev->hw.mac.lane_num); - if (ret) - return ret; - if (hdev->hw.mac.support_autoneg) { ret = hclge_set_autoneg_en(hdev, hdev->hw.mac.autoneg); if (ret) return ret; } + if (!hdev->hw.mac.autoneg) { + ret = hclge_cfg_mac_speed_dup_hw(hdev, hdev->hw.mac.req_speed, + hdev->hw.mac.req_duplex, + hdev->hw.mac.lane_num); + if (ret) + return ret; + } + mac->link = 0; if (mac->user_fec_mode & BIT(HNAE3_FEC_USER_DEF)) { @@ -11444,7 +11456,7 @@ static void hclge_pci_uninit(struct hclge_dev *hdev) pcim_iounmap(pdev, hdev->hw.hw.io_base); pci_free_irq_vectors(pdev); - pci_release_mem_regions(pdev); + pci_release_regions(pdev); pci_disable_device(pdev); } @@ -11516,8 +11528,8 @@ static void hclge_reset_done(struct hnae3_ae_dev *ae_dev) dev_err(&hdev->pdev->dev, "fail to rebuild, ret=%d\n", ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGE_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static void hclge_clear_resetting_state(struct hclge_dev *hdev) diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c index 85fb11de43a12457879f95c27e7ca4b14355c234..80079657afebe0add970be55425deaa87b46e3b4 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c @@ -191,6 +191,9 @@ static void hclge_mac_adjust_link(struct net_device *netdev) if (ret) netdev_err(netdev, "failed to adjust link.\n"); + hdev->hw.mac.req_speed = (u32)speed; + hdev->hw.mac.req_duplex = (u8)duplex; + ret = hclge_cfg_flowctrl(hdev); if (ret) netdev_err(netdev, "failed to configure flow control.\n"); diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c index 3735d2fed11f75ea44070700db9c15f928489a24..094a7c7b55921f9a0ddb3637ebe8d5ca9eb46c38 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c @@ -1747,8 +1747,8 @@ static void hclgevf_reset_done(struct hnae3_ae_dev *ae_dev) ret); hdev->reset_type = HNAE3_NONE_RESET; - clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state); - up(&hdev->reset_sem); + if (test_and_clear_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state)) + up(&hdev->reset_sem); } static u32 hclgevf_get_fw_version(struct hnae3_handle *handle) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c index 00fed5a61d62a98ace67b62aa3806d7f0356b00d..62ef8e2fb5f1ba378472f3958b56d9c386976545 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -337,7 +337,7 @@ int ice_devlink_create_pf_port(struct ice_pf *pf) return -EIO; attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pf->hw.bus.func; + attrs.phys.port_number = pf->hw.pf_id; /* As FW supports only port split options for whole device, * set port split options only for first PF. @@ -455,7 +455,7 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) return -EINVAL; attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; - attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.pf = pf->hw.pf_id; attrs.pci_vf.vf = vf->vf_id; ice_devlink_set_switch_id(pf, &attrs.switch_id); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 99a75a59078ef3e6c71c46696b694d00fd082ea1..caaa10157909ee8bcd01f525ace63b55184b5090 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -765,18 +765,17 @@ static inline struct xsk_buff_pool *ice_get_xp_from_qid(struct ice_vsi *vsi, } /** - * ice_xsk_pool - get XSK buffer pool bound to a ring + * ice_rx_xsk_pool - assign XSK buff pool to Rx ring * @ring: Rx ring to use * - * Returns a pointer to xsk_buff_pool structure if there is a buffer pool - * present, NULL otherwise. + * Sets XSK buff pool pointer on Rx ring. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) +static inline void ice_rx_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - return ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** @@ -801,7 +800,7 @@ static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) if (!ring) return; - ring->xsk_pool = ice_get_xp_from_qid(vsi, qid); + WRITE_ONCE(ring->xsk_pool, ice_get_xp_from_qid(vsi, qid)); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index 5d396c1a7731482f725561a8eff709ecd3cc793e..f448d3a84564259c44348266568cb7b8bd39aaca 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -512,6 +512,25 @@ static void ice_xsk_pool_fill_cb(struct ice_rx_ring *ring) xsk_pool_fill_cb(ring->xsk_pool, &desc); } +/** + * ice_get_frame_sz - calculate xdp_buff::frame_sz + * @rx_ring: the ring being configured + * + * Return frame size based on underlying PAGE_SIZE + */ +static unsigned int ice_get_frame_sz(struct ice_rx_ring *rx_ring) +{ + unsigned int frame_sz; + +#if (PAGE_SIZE >= 8192) + frame_sz = rx_ring->rx_buf_len; +#else + frame_sz = ice_rx_pg_size(rx_ring) / 2; +#endif + + return frame_sz; +} + /** * ice_vsi_cfg_rxq - Configure an Rx queue * @ring: the ring being configured @@ -536,7 +555,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return err; } - ring->xsk_pool = ice_xsk_pool(ring); + ice_rx_xsk_pool(ring); if (ring->xsk_pool) { xdp_rxq_info_unreg(&ring->xdp_rxq); @@ -576,7 +595,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) } } - xdp_init_buff(&ring->xdp, ice_rx_pg_size(ring) / 2, &ring->xdp_rxq); + xdp_init_buff(&ring->xdp, ice_get_frame_sz(ring), &ring->xdp_rxq); ring->xdp.data = NULL; ring->xdp_ext.pkt_ctx = &ring->pkt_ctx; err = ice_setup_rx_ctx(ring); @@ -597,7 +616,7 @@ static int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) return 0; } - ok = ice_alloc_rx_bufs_zc(ring, num_bufs); + ok = ice_alloc_rx_bufs_zc(ring, ring->xsk_pool, num_bufs); if (!ok) { u16 pf_q = ring->vsi->rxq_map[ring->q_index]; diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8c990c9761329954e01bce0f7d389771f9c66b9d..bc79ba974e4958e171b5f708578fff5e1911a37c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -4673,10 +4673,10 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, if (err) return err; - fec_stats->uncorrectable_blocks.total = (fec_corr_high_val << 16) + - fec_corr_low_val; - fec_stats->corrected_blocks.total = (fec_uncorr_high_val << 16) + - fec_uncorr_low_val; + fec_stats->corrected_blocks.total = (fec_corr_high_val << 16) + + fec_corr_low_val; + fec_stats->uncorrectable_blocks.total = (fec_uncorr_high_val << 16) + + fec_uncorr_low_val; return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index ec636be4d17d1e9b896380ba0223e4b12298b789..6f97ed471fe9a190cf82f10dfc0ed0f1b3942f3b 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -559,6 +559,8 @@ ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; + synchronize_irq(pf->oicr_irq.virq); + ice_unplug_aux_dev(pf); /* Notify VFs of impending reset */ @@ -2948,7 +2950,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) ice_for_each_rxq(vsi, i) { struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; - if (rx_ring->xsk_pool) + if (READ_ONCE(rx_ring->xsk_pool)) napi_schedule(&rx_ring->q_vector->napi); } } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e2786cc1328611a4b2dae3b5ba88211d2e7e57bc..ef2e858f49bb0ed12302454177a2640b07ec7161 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1477,6 +1477,10 @@ void ice_ptp_link_change(struct ice_pf *pf, u8 port, bool linkup) /* Update cached link status for this port immediately */ ptp_port->link_up = linkup; + /* Skip HW writes if reset is in progress */ + if (pf->hw.reset_ongoing) + return; + switch (hw->ptp.phy_model) { case ICE_PHY_E810: /* Do not reconfigure E810 PHY */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 8bb743f78fcb4771d852293dfcbbd3b28dbf3726..c9bc3f1add5d37839e00dea9f9fecc04e737b327 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -456,7 +456,7 @@ void ice_free_rx_ring(struct ice_rx_ring *rx_ring) if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - rx_ring->xdp_prog = NULL; + WRITE_ONCE(rx_ring->xdp_prog, NULL); if (rx_ring->xsk_pool) { kfree(rx_ring->xdp_buf); rx_ring->xdp_buf = NULL; @@ -521,30 +521,6 @@ err: return -ENOMEM; } -/** - * ice_rx_frame_truesize - * @rx_ring: ptr to Rx ring - * @size: size - * - * calculate the truesize with taking into the account PAGE_SIZE of - * underlying arch - */ -static unsigned int -ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) -{ - unsigned int truesize; - -#if (PAGE_SIZE < 8192) - truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = rx_ring->rx_offset ? - SKB_DATA_ALIGN(rx_ring->rx_offset + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; -} - /** * ice_run_xdp - Executes an XDP program on initialized xdp_buff * @rx_ring: Rx ring @@ -837,16 +813,15 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) if (!dev_page_is_reusable(page)) return false; -#if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ if (unlikely(rx_buf->pgcnt - pagecnt_bias > 1)) return false; -#else +#if (PAGE_SIZE >= 8192) #define ICE_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_2048) + (SKB_WITH_OVERHEAD(PAGE_SIZE) - ICE_RXBUF_3072) if (rx_buf->page_offset > ICE_LAST_OFFSET) return false; -#endif /* PAGE_SIZE < 8192) */ +#endif /* PAGE_SIZE >= 8192) */ /* If we have drained the page fragment pool we need to update * the pagecnt_bias and page count so that we fully restock the @@ -949,12 +924,7 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = -#if (PAGE_SIZE < 8192) - page_count(rx_buf->page); -#else - 0; -#endif + rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -1160,11 +1130,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) bool failure; u32 first; - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, 0); -#endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { xdp_ring = rx_ring->xdp_ring; @@ -1223,10 +1188,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) hard_start = page_address(rx_buf->page) + rx_buf->page_offset - offset; xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp->frame_sz = ice_rx_frame_truesize(rx_ring, size); -#endif xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { break; @@ -1521,10 +1482,11 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * budget and be more aggressive about cleaning up the Tx descriptors. */ ice_for_each_tx_ring(tx_ring, q_vector->tx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(tx_ring->xsk_pool); bool wd; - if (tx_ring->xsk_pool) - wd = ice_xmit_zc(tx_ring); + if (xsk_pool) + wd = ice_xmit_zc(tx_ring, xsk_pool); else if (ice_ring_is_xdp(tx_ring)) wd = true; else @@ -1550,6 +1512,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) budget_per_ring = budget; ice_for_each_rx_ring(rx_ring, q_vector->rx) { + struct xsk_buff_pool *xsk_pool = READ_ONCE(rx_ring->xsk_pool); int cleaned; /* A dedicated path for zero-copy allows making a single @@ -1557,7 +1520,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * ice_clean_rx_irq function and makes the codebase cleaner. */ cleaned = rx_ring->xsk_pool ? - ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq_zc(rx_ring, xsk_pool, budget_per_ring) : ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a65955eb23c0bd85adc7f0f8f1a4b39867da8907..240a7bec242be349353be039326f9fe098f5bdbe 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -52,10 +52,8 @@ static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) { ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (ice_is_xdp_ena_vsi(vsi)) { - synchronize_rcu(); + if (ice_is_xdp_ena_vsi(vsi)) ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - } ice_clean_rx_ring(vsi->rx_rings[q_idx]); } @@ -112,25 +110,29 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, * ice_qvec_cfg_msix - Enable IRQ for given queue vector * @vsi: the VSI that contains queue vector * @q_vector: queue vector + * @qid: queue index */ static void -ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) { u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; + int q, _qid = qid; ice_cfg_itr(hw, q_vector); - ice_for_each_tx_ring(tx_ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, - q_vector->tx.itr_idx); + for (q = 0; q < q_vector->num_ring_tx; q++) { + ice_cfg_txq_interrupt(vsi, _qid, reg_idx, q_vector->tx.itr_idx); + _qid++; + } + + _qid = qid; - ice_for_each_rx_ring(rx_ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, - q_vector->rx.itr_idx); + for (q = 0; q < q_vector->num_ring_rx; q++) { + ice_cfg_rxq_interrupt(vsi, _qid, reg_idx, q_vector->rx.itr_idx); + _qid++; + } ice_flush(hw); } @@ -164,6 +166,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) struct ice_tx_ring *tx_ring; struct ice_rx_ring *rx_ring; int timeout = 50; + int fail = 0; int err; if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) @@ -180,15 +183,17 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) usleep_range(1000, 2000); } + synchronize_net(); + netif_carrier_off(vsi->netdev); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + ice_qvec_dis_irq(vsi, rx_ring, q_vector); ice_qvec_toggle_napi(vsi, q_vector, false); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; @@ -196,17 +201,15 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, &txq_meta); - if (err) - return err; + if (!fail) + fail = err; } - err = ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, true); - if (err) - return err; + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); ice_qp_clean_rings(vsi, q_idx); ice_qp_reset_stats(vsi, q_idx); - return 0; + return fail; } /** @@ -219,40 +222,48 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_q_vector *q_vector; + int fail = 0; + bool link_up; int err; err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; if (ice_is_xdp_ena_vsi(vsi)) { struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); - if (err) - return err; + if (!fail) + fail = err; ice_set_ring_xdp(xdp_ring); ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (err) - return err; + if (!fail) + fail = err; q_vector = vsi->rx_rings[q_idx]->q_vector; - ice_qvec_cfg_msix(vsi, q_vector); + ice_qvec_cfg_msix(vsi, q_vector, q_idx); err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); - if (err) - return err; + if (!fail) + fail = err; ice_qvec_toggle_napi(vsi, q_vector, true); ice_qvec_ena_irq(vsi, q_vector); - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ + synchronize_net(); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } clear_bit(ICE_CFG_BUSY, vsi->state); - return 0; + return fail; } /** @@ -459,6 +470,7 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, /** * __ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Place the @count of descriptors onto Rx ring. Handle the ring wrap @@ -467,7 +479,8 @@ static u16 ice_fill_rx_descs(struct xsk_buff_pool *pool, struct xdp_buff **xdp, * * Returns true if all allocations were successful, false if any fail. */ -static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u32 nb_buffs_extra = 0, nb_buffs = 0; union ice_32b_rx_flex_desc *rx_desc; @@ -479,8 +492,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) xdp = ice_xdp_buf(rx_ring, ntu); if (ntu + count >= rx_ring->count) { - nb_buffs_extra = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, - rx_desc, + nb_buffs_extra = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, rx_ring->count - ntu); if (nb_buffs_extra != rx_ring->count - ntu) { ntu += nb_buffs_extra; @@ -493,7 +505,7 @@ static bool __ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) ice_release_rx_desc(rx_ring, 0); } - nb_buffs = ice_fill_rx_descs(rx_ring->xsk_pool, xdp, rx_desc, count); + nb_buffs = ice_fill_rx_descs(xsk_pool, xdp, rx_desc, count); ntu += nb_buffs; if (ntu == rx_ring->count) @@ -509,6 +521,7 @@ exit: /** * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers * @rx_ring: Rx ring + * @xsk_pool: XSK buffer pool to pick buffers to be filled by HW * @count: The number of buffers to allocate * * Wrapper for internal allocation routine; figure out how many tail @@ -516,7 +529,8 @@ exit: * * Returns true if all calls to internal alloc routine succeeded */ -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count) { u16 rx_thresh = ICE_RING_QUARTER(rx_ring); u16 leftover, i, tail_bumps; @@ -525,9 +539,9 @@ bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) leftover = count - (tail_bumps * rx_thresh); for (i = 0; i < tail_bumps; i++) - if (!__ice_alloc_rx_bufs_zc(rx_ring, rx_thresh)) + if (!__ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, rx_thresh)) return false; - return __ice_alloc_rx_bufs_zc(rx_ring, leftover); + return __ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, leftover); } /** @@ -596,8 +610,10 @@ out: /** * ice_clean_xdp_irq_zc - produce AF_XDP descriptors to CQ * @xdp_ring: XDP Tx ring + * @xsk_pool: AF_XDP buffer pool pointer */ -static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring) +static u32 ice_clean_xdp_irq_zc(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { u16 ntc = xdp_ring->next_to_clean; struct ice_tx_desc *tx_desc; @@ -648,7 +664,7 @@ skip: if (xdp_ring->next_to_clean >= cnt) xdp_ring->next_to_clean -= cnt; if (xsk_frames) - xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); + xsk_tx_completed(xsk_pool, xsk_frames); return completed_frames; } @@ -657,6 +673,7 @@ skip: * ice_xmit_xdp_tx_zc - AF_XDP ZC handler for XDP_TX * @xdp: XDP buffer to xmit * @xdp_ring: XDP ring to produce descriptor onto + * @xsk_pool: AF_XDP buffer pool pointer * * note that this function works directly on xdp_buff, no need to convert * it to xdp_frame. xdp_buff pointer is stored to ice_tx_buf so that cleaning @@ -666,7 +683,8 @@ skip: * was not enough space on XDP ring */ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, - struct ice_tx_ring *xdp_ring) + struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { struct skb_shared_info *sinfo = NULL; u32 size = xdp->data_end - xdp->data; @@ -680,7 +698,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, free_space = ICE_DESC_UNUSED(xdp_ring); if (free_space < ICE_RING_QUARTER(xdp_ring)) - free_space += ice_clean_xdp_irq_zc(xdp_ring); + free_space += ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); if (unlikely(!free_space)) goto busy; @@ -700,7 +718,7 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, dma_addr_t dma; dma = xsk_buff_xdp_get_dma(xdp); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, size); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, size); tx_buf->xdp = xdp; tx_buf->type = ICE_TX_BUF_XSK_TX; @@ -742,12 +760,14 @@ busy: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @xsk_pool: AF_XDP buffer pool pointer * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool) { int err, result = ICE_XDP_PASS; u32 act; @@ -758,7 +778,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (!err) return ICE_XDP_REDIR; - if (xsk_uses_need_wakeup(rx_ring->xsk_pool) && err == -ENOBUFS) + if (xsk_uses_need_wakeup(xsk_pool) && err == -ENOBUFS) result = ICE_XDP_EXIT; else result = ICE_XDP_CONSUMED; @@ -769,7 +789,7 @@ ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: break; case XDP_TX: - result = ice_xmit_xdp_tx_zc(xdp, xdp_ring); + result = ice_xmit_xdp_tx_zc(xdp, xdp_ring, xsk_pool); if (result == ICE_XDP_CONSUMED) goto out_failure; break; @@ -821,14 +841,16 @@ ice_add_xsk_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *first, /** * ice_clean_rx_irq_zc - consumes packets from the hardware ring * @rx_ring: AF_XDP Rx ring + * @xsk_pool: AF_XDP buffer pool pointer * @budget: NAPI budget * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; - struct xsk_buff_pool *xsk_pool = rx_ring->xsk_pool; u32 ntc = rx_ring->next_to_clean; u32 ntu = rx_ring->next_to_use; struct xdp_buff *first = NULL; @@ -891,7 +913,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, first, xdp_prog, xdp_ring, + xsk_pool); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { xdp_xmit |= xdp_res; } else if (xdp_res == ICE_XDP_EXIT) { @@ -940,7 +963,8 @@ construct_skb: rx_ring->next_to_clean = ntc; entries_to_alloc = ICE_RX_DESC_UNUSED(rx_ring); if (entries_to_alloc > ICE_RING_QUARTER(rx_ring)) - failure |= !ice_alloc_rx_bufs_zc(rx_ring, entries_to_alloc); + failure |= !ice_alloc_rx_bufs_zc(rx_ring, xsk_pool, + entries_to_alloc); ice_finalize_xdp_rx(xdp_ring, xdp_xmit, 0); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); @@ -963,17 +987,19 @@ construct_skb: /** * ice_xmit_pkt - produce a single HW Tx descriptor out of AF_XDP descriptor * @xdp_ring: XDP ring to produce the HW Tx descriptor on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @desc: AF_XDP descriptor to pull the DMA address and length from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, +static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, struct xdp_desc *desc, unsigned int *total_bytes) { struct ice_tx_desc *tx_desc; dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, desc->addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, desc->len); + dma = xsk_buff_raw_get_dma(xsk_pool, desc->addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, desc->len); tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -986,10 +1012,13 @@ static void ice_xmit_pkt(struct ice_tx_ring *xdp_ring, struct xdp_desc *desc, /** * ice_xmit_pkt_batch - produce a batch of HW Tx descriptors out of AF_XDP descriptors * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, +static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, unsigned int *total_bytes) { u16 ntu = xdp_ring->next_to_use; @@ -999,8 +1028,8 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de loop_unrolled_for(i = 0; i < PKTS_PER_BATCH; i++) { dma_addr_t dma; - dma = xsk_buff_raw_get_dma(xdp_ring->xsk_pool, descs[i].addr); - xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_pool, dma, descs[i].len); + dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr); + xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len); tx_desc = ICE_TX_DESC(xdp_ring, ntu++); tx_desc->buf_addr = cpu_to_le64(dma); @@ -1016,60 +1045,69 @@ static void ice_xmit_pkt_batch(struct ice_tx_ring *xdp_ring, struct xdp_desc *de /** * ice_fill_tx_hw_ring - produce the number of Tx descriptors onto ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: XSK buffer pool to pick buffers to be consumed by HW * @descs: AF_XDP descriptors to pull the DMA addresses and lengths from * @nb_pkts: count of packets to be send * @total_bytes: bytes accumulator that will be used for stats update */ -static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, struct xdp_desc *descs, - u32 nb_pkts, unsigned int *total_bytes) +static void ice_fill_tx_hw_ring(struct ice_tx_ring *xdp_ring, + struct xsk_buff_pool *xsk_pool, + struct xdp_desc *descs, u32 nb_pkts, + unsigned int *total_bytes) { u32 batched, leftover, i; batched = ALIGN_DOWN(nb_pkts, PKTS_PER_BATCH); leftover = nb_pkts & (PKTS_PER_BATCH - 1); for (i = 0; i < batched; i += PKTS_PER_BATCH) - ice_xmit_pkt_batch(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt_batch(xdp_ring, xsk_pool, &descs[i], total_bytes); for (; i < batched + leftover; i++) - ice_xmit_pkt(xdp_ring, &descs[i], total_bytes); + ice_xmit_pkt(xdp_ring, xsk_pool, &descs[i], total_bytes); } /** * ice_xmit_zc - take entries from XSK Tx ring and place them onto HW Tx ring * @xdp_ring: XDP ring to produce the HW Tx descriptors on + * @xsk_pool: AF_XDP buffer pool pointer * * Returns true if there is no more work that needs to be done, false otherwise */ -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring) +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool) { - struct xdp_desc *descs = xdp_ring->xsk_pool->tx_descs; + struct xdp_desc *descs = xsk_pool->tx_descs; u32 nb_pkts, nb_processed = 0; unsigned int total_bytes = 0; int budget; - ice_clean_xdp_irq_zc(xdp_ring); + ice_clean_xdp_irq_zc(xdp_ring, xsk_pool); + + if (!netif_carrier_ok(xdp_ring->vsi->netdev) || + !netif_running(xdp_ring->vsi->netdev)) + return true; budget = ICE_DESC_UNUSED(xdp_ring); budget = min_t(u16, budget, ICE_RING_QUARTER(xdp_ring)); - nb_pkts = xsk_tx_peek_release_desc_batch(xdp_ring->xsk_pool, budget); + nb_pkts = xsk_tx_peek_release_desc_batch(xsk_pool, budget); if (!nb_pkts) return true; if (xdp_ring->next_to_use + nb_pkts >= xdp_ring->count) { nb_processed = xdp_ring->count - xdp_ring->next_to_use; - ice_fill_tx_hw_ring(xdp_ring, descs, nb_processed, &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, descs, nb_processed, + &total_bytes); xdp_ring->next_to_use = 0; } - ice_fill_tx_hw_ring(xdp_ring, &descs[nb_processed], nb_pkts - nb_processed, - &total_bytes); + ice_fill_tx_hw_ring(xdp_ring, xsk_pool, &descs[nb_processed], + nb_pkts - nb_processed, &total_bytes); ice_set_rs_bit(xdp_ring); ice_xdp_ring_update_tail(xdp_ring); ice_update_tx_ring_stats(xdp_ring, nb_pkts, total_bytes); - if (xsk_uses_need_wakeup(xdp_ring->xsk_pool)) - xsk_set_tx_need_wakeup(xdp_ring->xsk_pool); + if (xsk_uses_need_wakeup(xsk_pool)) + xsk_set_tx_need_wakeup(xsk_pool); return nb_pkts < budget; } @@ -1091,7 +1129,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_vsi *vsi = np->vsi; struct ice_tx_ring *ring; - if (test_bit(ICE_VSI_DOWN, vsi->state)) + if (test_bit(ICE_VSI_DOWN, vsi->state) || !netif_carrier_ok(netdev)) return -ENETDOWN; if (!ice_is_xdp_ena_vsi(vsi)) @@ -1102,7 +1140,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, ring = vsi->rx_rings[queue_id]->xdp_ring; - if (!ring->xsk_pool) + if (!READ_ONCE(ring->xsk_pool)) return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 6fa181f080ef139f94ae1faee9c3442c0e0371b9..45adeb513253a0d12e11251c4070855451a7ed44 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -20,16 +20,20 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, + int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, + struct xsk_buff_pool *xsk_pool, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); -bool ice_xmit_zc(struct ice_tx_ring *xdp_ring); +bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); #else -static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring) +static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, + struct xsk_buff_pool __always_unused *xsk_pool) { return false; } @@ -44,6 +48,7 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, static inline int ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, int __always_unused budget) { return 0; @@ -51,6 +56,7 @@ ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, static inline bool ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, + struct xsk_buff_pool __always_unused *xsk_pool, u16 __always_unused count) { return false; diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 5dbf2b4ba1b001903d03e88c9b7a603795dac1a7..0b6c8fd5bc90f7ad56768d79fd0dc234ba0b4135 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -900,8 +900,8 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); - idpf_vport_intr_rel(vport); idpf_vport_queues_rel(vport); + idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; } @@ -1335,9 +1335,8 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up - * @alloc_res: allocate queue resources */ -static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) +static int idpf_vport_open(struct idpf_vport *vport) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1350,45 +1349,43 @@ static int idpf_vport_open(struct idpf_vport *vport, bool alloc_res) /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); - if (alloc_res) { - err = idpf_vport_queues_alloc(vport); - if (err) - return err; - } - err = idpf_vport_intr_alloc(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - goto queues_rel; + return err; } + err = idpf_vport_queues_alloc(vport); + if (err) + goto intr_rel; + err = idpf_vport_queue_ids_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue ids for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_vport_intr_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize interrupts for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_rx_bufs_init_all(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } err = idpf_queue_reg_init(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); - goto intr_rel; + goto queues_rel; } idpf_rx_init_buf_tail(vport); @@ -1455,10 +1452,10 @@ unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); intr_deinit: idpf_vport_intr_deinit(vport); -intr_rel: - idpf_vport_intr_rel(vport); queues_rel: idpf_vport_queues_rel(vport); +intr_rel: + idpf_vport_intr_rel(vport); return err; } @@ -1539,7 +1536,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport, true); + idpf_vport_open(vport); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1898,9 +1895,6 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto free_vport; } - err = idpf_vport_queues_alloc(new_vport); - if (err) - goto free_vport; if (current_state <= __IDPF_VPORT_DOWN) { idpf_send_delete_queues_msg(vport); } else { @@ -1932,17 +1926,23 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, err = idpf_set_real_num_queues(vport); if (err) - goto err_reset; + goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport, false); + err = idpf_vport_open(vport); kfree(new_vport); return err; err_reset: - idpf_vport_queues_rel(new_vport); + idpf_send_add_queues_msg(vport, vport->num_txq, vport->num_complq, + vport->num_rxq, vport->num_bufq); + +err_open: + if (current_state == __IDPF_VPORT_UP) + idpf_vport_open(vport); + free_vport: kfree(new_vport); @@ -2171,7 +2171,7 @@ static int idpf_open(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - err = idpf_vport_open(vport, true); + err = idpf_vport_open(vport); idpf_vport_ctrl_unlock(netdev); diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index af2879f03b8df72d94e7538243210a6e60d11656..585c3dadd9bface46192c2e73dc222442b1e09ef 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -3576,9 +3576,7 @@ static void idpf_vport_intr_napi_dis_all(struct idpf_vport *vport) */ void idpf_vport_intr_rel(struct idpf_vport *vport) { - int i, j, v_idx; - - for (v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { + for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; kfree(q_vector->complq); @@ -3593,26 +3591,6 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) free_cpumask_var(q_vector->affinity_mask); } - /* Clean up the mapping of queues to vectors */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - - if (idpf_is_queue_model_split(vport->rxq_model)) - for (j = 0; j < rx_qgrp->splitq.num_rxq_sets; j++) - rx_qgrp->splitq.rxq_sets[j]->rxq.q_vector = NULL; - else - for (j = 0; j < rx_qgrp->singleq.num_rxq; j++) - rx_qgrp->singleq.rxqs[j]->q_vector = NULL; - } - - if (idpf_is_queue_model_split(vport->txq_model)) - for (i = 0; i < vport->num_txq_grp; i++) - vport->txq_grps[i].complq->q_vector = NULL; - else - for (i = 0; i < vport->num_txq_grp; i++) - for (j = 0; j < vport->txq_grps[i].num_txq; j++) - vport->txq_grps[i].txqs[j]->q_vector = NULL; - kfree(vport->q_vectors); vport->q_vectors = NULL; } @@ -3780,13 +3758,15 @@ void idpf_vport_intr_update_itr_ena_irq(struct idpf_q_vector *q_vector) /** * idpf_vport_intr_req_irq - get MSI-X vectors from the OS for the vport * @vport: main vport structure - * @basename: name for the vector */ -static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) +static int idpf_vport_intr_req_irq(struct idpf_vport *vport) { struct idpf_adapter *adapter = vport->adapter; + const char *drv_name, *if_name, *vec_name; int vector, err, irq_num, vidx; - const char *vec_name; + + drv_name = dev_driver_string(&adapter->pdev->dev); + if_name = netdev_name(vport->netdev); for (vector = 0; vector < vport->num_q_vectors; vector++) { struct idpf_q_vector *q_vector = &vport->q_vectors[vector]; @@ -3804,8 +3784,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport, char *basename) else continue; - name = kasprintf(GFP_KERNEL, "%s-%s-%d", basename, vec_name, - vidx); + name = kasprintf(GFP_KERNEL, "%s-%s-%s-%d", drv_name, if_name, + vec_name, vidx); err = request_irq(irq_num, idpf_vport_intr_clean_queues, 0, name, q_vector); @@ -4326,7 +4306,6 @@ error: */ int idpf_vport_intr_init(struct idpf_vport *vport) { - char *int_name; int err; err = idpf_vport_intr_init_vec_idx(vport); @@ -4340,11 +4319,7 @@ int idpf_vport_intr_init(struct idpf_vport *vport) if (err) goto unroll_vectors_alloc; - int_name = kasprintf(GFP_KERNEL, "%s-%s", - dev_driver_string(&vport->adapter->pdev->dev), - vport->netdev->name); - - err = idpf_vport_intr_req_irq(vport, int_name); + err = idpf_vport_intr_req_irq(vport); if (err) goto unroll_vectors_alloc; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 11be39f435f38daeb42aa4fbf3a970e067753285..33a42b4c21e0b622aedee563aca67c3ae34063ac 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -4808,6 +4808,7 @@ static void igb_set_rx_buffer_len(struct igb_adapter *adapter, #if (PAGE_SIZE < 8192) if (adapter->max_frame_size > IGB_MAX_FRAME_BUILD_SKB || + IGB_2K_TOO_SMALL_WITH_PADDING || rd32(E1000_RCTL) & E1000_RCTL_SBP) set_ring_uses_large_buffer(rx_ring); #endif diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index 5f92b3c7c3d4a1396437337783647cfc7c654320..511384f3ec5cb5b91a0584b3aaf9370b3d4a48ac 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -404,6 +404,12 @@ #define IGC_DTXMXPKTSZ_TSN 0x19 /* 1600 bytes of max TX DMA packet size */ #define IGC_DTXMXPKTSZ_DEFAULT 0x98 /* 9728-byte Jumbo frames */ +/* Retry Buffer Control */ +#define IGC_RETX_CTL 0x041C +#define IGC_RETX_CTL_WATERMARK_MASK 0xF +#define IGC_RETX_CTL_QBVFULLTH_SHIFT 8 /* QBV Retry Buffer Full Threshold */ +#define IGC_RETX_CTL_QBVFULLEN 0x1000 /* Enable QBV Retry Buffer Full Threshold */ + /* Transmit Scheduling Latency */ /* Latency between transmission scheduling (LaunchTime) and the time * the packet is transmitted to the network in nanosecond. diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index cb5c7b09e8a009c49e6777424ae434e59b6e26c9..dfd6c00b4205d4ab251bbd994ceccd4ae1cd0f69 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -6306,21 +6306,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, size_t n; int i; - switch (qopt->cmd) { - case TAPRIO_CMD_REPLACE: - break; - case TAPRIO_CMD_DESTROY: - return igc_tsn_clear_schedule(adapter); - case TAPRIO_CMD_STATS: - igc_taprio_stats(adapter->netdev, &qopt->stats); - return 0; - case TAPRIO_CMD_QUEUE_STATS: - igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); - return 0; - default: - return -EOPNOTSUPP; - } - if (qopt->base_time < 0) return -ERANGE; @@ -6330,12 +6315,16 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter, if (!validate_schedule(adapter, qopt)) return -EINVAL; + igc_ptp_read(adapter, &now); + + if (igc_tsn_is_taprio_activated_by_user(adapter) && + is_base_time_past(qopt->base_time, &now)) + adapter->qbv_config_change_errors++; + adapter->cycle_time = qopt->cycle_time; adapter->base_time = qopt->base_time; adapter->taprio_offload_enable = true; - igc_ptp_read(adapter, &now); - for (n = 0; n < qopt->num_entries; n++) { struct tc_taprio_sched_entry *e = &qopt->entries[n]; @@ -6429,7 +6418,23 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter, if (hw->mac.type != igc_i225) return -EOPNOTSUPP; - err = igc_save_qbv_schedule(adapter, qopt); + switch (qopt->cmd) { + case TAPRIO_CMD_REPLACE: + err = igc_save_qbv_schedule(adapter, qopt); + break; + case TAPRIO_CMD_DESTROY: + err = igc_tsn_clear_schedule(adapter); + break; + case TAPRIO_CMD_STATS: + igc_taprio_stats(adapter->netdev, &qopt->stats); + return 0; + case TAPRIO_CMD_QUEUE_STATS: + igc_taprio_queue_stats(adapter->netdev, &qopt->queue_stats); + return 0; + default: + return -EOPNOTSUPP; + } + if (err) return err; diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c index 22cefb1eeedfa71651442d9da319778fa3b2101c..d68fa7f3d5f0780655e18348002ca35c0272628b 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.c +++ b/drivers/net/ethernet/intel/igc/igc_tsn.c @@ -49,12 +49,19 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter) return new_flags; } +static bool igc_tsn_is_tx_mode_in_tsn(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return !!(rd32(IGC_TQAVCTRL) & IGC_TQAVCTRL_TRANSMIT_MODE_TSN); +} + void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; u16 txoffset; - if (!is_any_launchtime(adapter)) + if (!igc_tsn_is_tx_mode_in_tsn(adapter)) return; switch (adapter->link_speed) { @@ -78,6 +85,23 @@ void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter) wr32(IGC_GTXOFFSET, txoffset); } +static void igc_tsn_restore_retx_default(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl; + + retxctl = rd32(IGC_RETX_CTL) & IGC_RETX_CTL_WATERMARK_MASK; + wr32(IGC_RETX_CTL, retxctl); +} + +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + + return (rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && + adapter->taprio_offload_enable; +} + /* Returns the TSN specific registers to their default values after * the adapter is reset. */ @@ -91,6 +115,9 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT); wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT); + if (igc_is_device_id_i226(hw)) + igc_tsn_restore_retx_default(adapter); + tqavctrl = rd32(IGC_TQAVCTRL); tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV | IGC_TQAVCTRL_FUTSCDDIS); @@ -111,6 +138,25 @@ static int igc_tsn_disable_offload(struct igc_adapter *adapter) return 0; } +/* To partially fix i226 HW errata, reduce MAC internal buffering from 192 Bytes + * to 88 Bytes by setting RETX_CTL register using the recommendation from: + * a) Ethernet Controller I225/I226 Specification Update Rev 2.1 + * Item 9: TSN: Packet Transmission Might Cross the Qbv Window + * b) I225/6 SW User Manual Rev 1.2.4: Section 8.11.5 Retry Buffer Control + */ +static void igc_tsn_set_retx_qbvfullthreshold(struct igc_adapter *adapter) +{ + struct igc_hw *hw = &adapter->hw; + u32 retxctl, watermark; + + retxctl = rd32(IGC_RETX_CTL); + watermark = retxctl & IGC_RETX_CTL_WATERMARK_MASK; + /* Set QBVFULLTH value using watermark and set QBVFULLEN */ + retxctl |= (watermark << IGC_RETX_CTL_QBVFULLTH_SHIFT) | + IGC_RETX_CTL_QBVFULLEN; + wr32(IGC_RETX_CTL, retxctl); +} + static int igc_tsn_enable_offload(struct igc_adapter *adapter) { struct igc_hw *hw = &adapter->hw; @@ -123,6 +169,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter) wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN); wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN); + if (igc_is_device_id_i226(hw)) + igc_tsn_set_retx_qbvfullthreshold(adapter); + for (i = 0; i < adapter->num_tx_queues; i++) { struct igc_ring *ring = adapter->tx_ring[i]; u32 txqctl = 0; @@ -262,14 +311,6 @@ skip_cbs: s64 n = div64_s64(ktime_sub_ns(systim, base_time), cycle); base_time = ktime_add_ns(base_time, (n + 1) * cycle); - - /* Increase the counter if scheduling into the past while - * Gate Control List (GCL) is running. - */ - if ((rd32(IGC_BASET_H) || rd32(IGC_BASET_L)) && - (adapter->tc_setup_type == TC_SETUP_QDISC_TAPRIO) && - (adapter->qbv_count > 1)) - adapter->qbv_config_change_errors++; } else { if (igc_is_device_id_i226(hw)) { ktime_t adjust_time, expires_time; @@ -331,15 +372,22 @@ int igc_tsn_reset(struct igc_adapter *adapter) return err; } -int igc_tsn_offload_apply(struct igc_adapter *adapter) +static bool igc_tsn_will_tx_mode_change(struct igc_adapter *adapter) { - struct igc_hw *hw = &adapter->hw; + bool any_tsn_enabled = !!(igc_tsn_new_flags(adapter) & + IGC_FLAG_TSN_ANY_ENABLED); - /* Per I225/6 HW Design Section 7.5.2.1, transmit mode - * cannot be changed dynamically. Require reset the adapter. + return (any_tsn_enabled && !igc_tsn_is_tx_mode_in_tsn(adapter)) || + (!any_tsn_enabled && igc_tsn_is_tx_mode_in_tsn(adapter)); +} + +int igc_tsn_offload_apply(struct igc_adapter *adapter) +{ + /* Per I225/6 HW Design Section 7.5.2.1 guideline, if tx mode change + * from legacy->tsn or tsn->legacy, then reset adapter is needed. */ if (netif_running(adapter->netdev) && - (igc_is_device_id_i225(hw) || !adapter->qbv_count)) { + igc_tsn_will_tx_mode_change(adapter)) { schedule_work(&adapter->reset_task); return 0; } diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h index b53e6af560b73857829d0361356f83aee9bbd57f..98ec845a86bf00935089bd7c5c7a3a7422facf05 100644 --- a/drivers/net/ethernet/intel/igc/igc_tsn.h +++ b/drivers/net/ethernet/intel/igc/igc_tsn.h @@ -7,5 +7,6 @@ int igc_tsn_offload_apply(struct igc_adapter *adapter); int igc_tsn_reset(struct igc_adapter *adapter); void igc_tsn_adjust_txtime_offset(struct igc_adapter *adapter); +bool igc_tsn_is_taprio_activated_by_user(struct igc_adapter *adapter); #endif /* _IGC_BASE_H */ diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c index b06e245629739f6f16005a51e181ce057d3e8857..d8be0e4dcb072b8cf8e8d26a22173829de3d40c8 100644 --- a/drivers/net/ethernet/jme.c +++ b/drivers/net/ethernet/jme.c @@ -946,15 +946,13 @@ jme_udpsum(struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IP)) return csum; skb_set_network_header(skb, ETH_HLEN); - if ((ip_hdr(skb)->protocol != IPPROTO_UDP) || - (skb->len < (ETH_HLEN + - (ip_hdr(skb)->ihl << 2) + - sizeof(struct udphdr)))) { + + if (ip_hdr(skb)->protocol != IPPROTO_UDP || + skb->len < (ETH_HLEN + ip_hdrlen(skb) + sizeof(struct udphdr))) { skb_reset_network_header(skb); return csum; } - skb_set_transport_header(skb, - ETH_HLEN + (ip_hdr(skb)->ihl << 2)); + skb_set_transport_header(skb, ETH_HLEN + ip_hdrlen(skb)); csum = udp_hdr(skb)->check; skb_reset_transport_header(skb); skb_reset_network_header(skb); diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8c45ad983abc0c4b94a731a948dd6f56f48941ef..0d62a33afa80caf687e9175e4ed0d90ce054dc87 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -953,13 +953,13 @@ static void mvpp2_bm_pool_update_fc(struct mvpp2_port *port, static void mvpp2_bm_pool_update_priv_fc(struct mvpp2 *priv, bool en) { struct mvpp2_port *port; - int i; + int i, j; for (i = 0; i < priv->port_count; i++) { port = priv->port_list[i]; if (port->priv->percpu_pools) { - for (i = 0; i < port->nrxqs; i++) - mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[i], + for (j = 0; j < port->nrxqs; j++) + mvpp2_bm_pool_update_fc(port, &port->priv->bm_pools[j], port->tx_fc & en); } else { mvpp2_bm_pool_update_fc(port, port->pool_long, port->tx_fc & en); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c index 3e09d2285814709c0f4c3141f1b819580385abac..daf4b951e905910e534a832690a06ac85aca2908 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cpt.c @@ -632,7 +632,9 @@ int rvu_mbox_handler_cpt_inline_ipsec_cfg(struct rvu *rvu, return ret; } -static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) +static bool validate_and_update_reg_offset(struct rvu *rvu, + struct cpt_rd_wr_reg_msg *req, + u64 *reg_offset) { u64 offset = req->reg_offset; int blkaddr, num_lfs, lf; @@ -663,6 +665,11 @@ static bool is_valid_offset(struct rvu *rvu, struct cpt_rd_wr_reg_msg *req) if (lf < 0) return false; + /* Translate local LF's offset to global CPT LF's offset to + * access LFX register. + */ + *reg_offset = (req->reg_offset & 0xFF000) + (lf << 3); + return true; } else if (!(req->hdr.pcifunc & RVU_PFVF_FUNC_MASK)) { /* Registers that can be accessed from PF */ @@ -697,7 +704,7 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, struct cpt_rd_wr_reg_msg *rsp) { u64 offset = req->reg_offset; - int blkaddr, lf; + int blkaddr; blkaddr = validate_and_get_cpt_blkaddr(req->blkaddr); if (blkaddr < 0) @@ -708,18 +715,10 @@ int rvu_mbox_handler_cpt_rd_wr_register(struct rvu *rvu, !is_cpt_vf(rvu, req->hdr.pcifunc)) return CPT_AF_ERR_ACCESS_DENIED; - if (!is_valid_offset(rvu, req)) + if (!validate_and_update_reg_offset(rvu, req, &offset)) return CPT_AF_ERR_ACCESS_DENIED; - /* Translate local LF used by VFs to global CPT LF */ - lf = rvu_get_lf(rvu, &rvu->hw->block[blkaddr], req->hdr.pcifunc, - (offset & 0xFFF) >> 3); - - /* Translate local LF's offset to global CPT LF's offset */ - offset &= 0xFF000; - offset += lf << 3; - - rsp->reg_offset = offset; + rsp->reg_offset = req->reg_offset; rsp->ret_val = req->ret_val; rsp->is_write = req->is_write; diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 61334a71058c7594a61ca768ce041f92ab238d24..e212a4ba92751fc807e6aea5d9d187f008eb30e5 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -2666,14 +2666,15 @@ mtk_wed_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_pri { struct mtk_wed_flow_block_priv *priv = cb_priv; struct flow_cls_offload *cls = type_data; - struct mtk_wed_hw *hw = priv->hw; + struct mtk_wed_hw *hw = NULL; - if (!tc_can_offload(priv->dev)) + if (!priv || !tc_can_offload(priv->dev)) return -EOPNOTSUPP; if (type != TC_SETUP_CLSFLOWER) return -EOPNOTSUPP; + hw = priv->hw; return mtk_flow_offload_cmd(hw->eth, cls, hw->index); } @@ -2729,6 +2730,7 @@ mtk_wed_setup_tc_block(struct mtk_wed_hw *hw, struct net_device *dev, flow_block_cb_remove(block_cb, f); list_del(&block_cb->driver_list); kfree(block_cb->cb_priv); + block_cb->cb_priv = NULL; } return 0; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 5fd82c67b6ab7f9e827c936408d66cc93c5db51e..d9e241423bc5673299d13175d27197cb1117700c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -130,7 +130,7 @@ struct page_pool; #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x2 #define MLX5E_DEFAULT_LRO_TIMEOUT 32 -#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 +#define MLX5E_DEFAULT_SHAMPO_TIMEOUT 1024 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC 0x10 #define MLX5E_PARAMS_DEFAULT_RX_CQ_MODERATION_USEC_FROM_CQE 0x3 @@ -998,6 +998,7 @@ void mlx5e_build_ptys2ethtool_map(void); bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev, u8 page_shift, enum mlx5e_mpwrq_umr_mode umr_mode); +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len); void mlx5e_shampo_dealloc_hd(struct mlx5e_rq *rq); void mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 6c9ccccca81e27dc51829ed05d1ca3259a9baa92..64b62ed17b07a7383045779b4bb017f4bc642670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -928,7 +928,7 @@ int mlx5e_build_rq_param(struct mlx5_core_dev *mdev, MLX5_SET(wq, wq, log_headers_entry_size, mlx5e_shampo_get_log_hd_entry_size(mdev, params)); MLX5_SET(rqc, rqc, reservation_timeout, - params->packet_merge.timeout); + mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_SHAMPO_TIMEOUT)); MLX5_SET(rqc, rqc, shampo_match_criteria_type, params->packet_merge.shampo.match_criteria_type); MLX5_SET(rqc, rqc, shampo_no_match_alignment_granularity, @@ -1087,6 +1087,20 @@ static u32 mlx5e_shampo_icosq_sz(struct mlx5_core_dev *mdev, return wqebbs; } +#define MLX5E_LRO_TIMEOUT_ARR_SIZE 4 + +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) +{ + int i; + + /* The supported periods are organized in ascending order */ + for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) + if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) + break; + + return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); +} + static u32 mlx5e_mpwrq_total_umr_wqebbs(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 749b2ec0436eeadc0f48e2c0e813eeaa8f403ed0..3f8986f9d86291e6744315ddc8d15d0568e008a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -108,6 +108,7 @@ u32 mlx5e_shampo_hd_per_wqe(struct mlx5_core_dev *mdev, u32 mlx5e_shampo_hd_per_wq(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rq_param); +u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout); u8 mlx5e_mpwqe_get_log_stride_size(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_xsk_param *xsk); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 22918b2ef7f128849be838063819ed12509abb45..09433b91be176fb639a2b54cf3fb9595621c168b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -146,7 +146,9 @@ static int mlx5e_tx_reporter_timeout_recover(void *ctx) return err; } + mutex_lock(&priv->state_lock); err = mlx5e_safe_reopen_channels(priv); + mutex_unlock(&priv->state_lock); if (!err) { to_ctx->status = 1; /* all channels recovered */ return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c index 8cf8ba2622f2bfaa03b41cc9af0357bf8050e71e..71a168746ebe21d496612cf7f239671cd0d4cf20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c @@ -932,6 +932,7 @@ err_rule: mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, mh); mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id); err_mod_hdr: + *attr = *old_attr; kfree(old_attr); err_attr: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c index 6e00afe4671b78ae48eb3eaaddf8456eb09ab0e4..797db853de3638d95f19cf550998d6af477174fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_offload.c @@ -51,9 +51,10 @@ u32 mlx5_ipsec_device_caps(struct mlx5_core_dev *mdev) MLX5_CAP_FLOWTABLE_NIC_RX(mdev, decap)) caps |= MLX5_IPSEC_CAP_PACKET_OFFLOAD; - if ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || - MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) + if (IS_ENABLED(CONFIG_MLX5_CLS_ACT) && + ((MLX5_CAP_FLOWTABLE_NIC_TX(mdev, ignore_flow_level) && + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, ignore_flow_level)) || + MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))) caps |= MLX5_IPSEC_CAP_PRIO; if (MLX5_CAP_FLOWTABLE_NIC_TX(mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 00d5661dc62e1a60388ce7077fa44e9ce5481678..36845872ae94e7e01e325546b9b6b192c99e3f0d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1409,7 +1409,12 @@ static int mlx5e_ethtool_set_link_ksettings(struct mlx5e_priv *priv, if (!an_changes && link_modes == eproto.admin) goto out; - mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + err = mlx5_port_set_eth_ptys(mdev, an_disable, link_modes, ext); + if (err) { + netdev_err(priv->netdev, "%s: failed to set ptys reg: %d\n", __func__, err); + goto out; + } + mlx5_toggle_port_link(mdev); out: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c index 3eccdadc0357810e18d93ba4d1104b99b1ba965d..773624bb2c5d549f30157ff158be7e909384585c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c @@ -734,7 +734,7 @@ mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv, if (num_tuples <= 0) { netdev_warn(priv->netdev, "%s: flow is not valid %d\n", __func__, num_tuples); - return num_tuples; + return num_tuples < 0 ? num_tuples : -EINVAL; } eth_ft = get_flow_table(priv, fs, num_tuples); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 6f686fabed4462801ae25200865848323893aa4c..16b67c457b605739ede6dd9b7423ca48d05cfc1e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1236,6 +1236,14 @@ void mlx5e_free_rx_missing_descs(struct mlx5e_rq *rq) rq->mpwqe.actual_wq_head = wq->head; rq->mpwqe.umr_in_progress = 0; rq->mpwqe.umr_completed = 0; + + if (test_bit(MLX5E_RQ_STATE_SHAMPO, &rq->state)) { + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + u16 len; + + len = (shampo->pi - shampo->ci) & shampo->hd_per_wq; + mlx5e_shampo_fill_umr(rq, len); + } } void mlx5e_free_rx_descs(struct mlx5e_rq *rq) @@ -3020,15 +3028,18 @@ int mlx5e_update_tx_netdev_queues(struct mlx5e_priv *priv) static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv, struct mlx5e_params *params) { - struct mlx5_core_dev *mdev = priv->mdev; - int num_comp_vectors, ix, irq; - - num_comp_vectors = mlx5_comp_vectors_max(mdev); + int ix; for (ix = 0; ix < params->num_channels; ix++) { + int num_comp_vectors, irq, vec_ix; + struct mlx5_core_dev *mdev; + + mdev = mlx5_sd_ch_ix_get_dev(priv->mdev, ix); + num_comp_vectors = mlx5_comp_vectors_max(mdev); cpumask_clear(priv->scratchpad.cpumask); + vec_ix = mlx5_sd_ch_ix_get_vec_ix(mdev, ix); - for (irq = ix; irq < num_comp_vectors; irq += params->num_channels) { + for (irq = vec_ix; irq < num_comp_vectors; irq += params->num_channels) { int cpu = mlx5_comp_vector_get_cpu(mdev, irq); cpumask_set_cpu(cpu, priv->scratchpad.cpumask); @@ -5167,18 +5178,6 @@ const struct net_device_ops mlx5e_netdev_ops = { #endif }; -static u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout) -{ - int i; - - /* The supported periods are organized in ascending order */ - for (i = 0; i < MLX5E_LRO_TIMEOUT_ARR_SIZE - 1; i++) - if (MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]) >= wanted_timeout) - break; - - return MLX5_CAP_ETH(mdev, lro_timer_supported_periods[i]); -} - void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 mtu) { struct mlx5e_params *params = &priv->channels.params; @@ -5308,7 +5307,7 @@ static void mlx5e_get_queue_stats_rx(struct net_device *dev, int i, struct mlx5e_rq_stats *rq_stats; ASSERT_RTNL(); - if (mlx5e_is_uplink_rep(priv)) + if (mlx5e_is_uplink_rep(priv) || !priv->stats_nch) return; channel_stats = priv->channel_stats[i]; @@ -5328,6 +5327,9 @@ static void mlx5e_get_queue_stats_tx(struct net_device *dev, int i, struct mlx5e_sq_stats *sq_stats; ASSERT_RTNL(); + if (!priv->stats_nch) + return; + /* no special case needed for ptp htb etc since txq2sq_stats is kept up * to date for active sq_stats, otherwise get_base_stats takes care of * inactive sqs. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 225da8d691fcf25a236bcd7c98fc351f86c2d566..de9d01036c2807afea326a90955aa3f59d039cbb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -735,6 +735,7 @@ static int mlx5e_alloc_rx_hd_mpwqe(struct mlx5e_rq *rq) ksm_entries = bitmap_find_window(shampo->bitmap, shampo->hd_per_wqe, shampo->hd_per_wq, shampo->pi); + ksm_entries = ALIGN_DOWN(ksm_entries, MLX5E_SHAMPO_WQ_HEADER_PER_PAGE); if (!ksm_entries) return 0; @@ -962,26 +963,31 @@ void mlx5e_free_icosq_descs(struct mlx5e_icosq *sq) sq->cc = sqcc; } -static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, - struct mlx5e_icosq *sq) +void mlx5e_shampo_fill_umr(struct mlx5e_rq *rq, int len) { - struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); - struct mlx5e_shampo_hd *shampo; - /* assume 1:1 relationship between RQ and icosq */ - struct mlx5e_rq *rq = &c->rq; - int end, from, len = umr.len; + struct mlx5e_shampo_hd *shampo = rq->mpwqe.shampo; + int end, from, full_len = len; - shampo = rq->mpwqe.shampo; end = shampo->hd_per_wq; from = shampo->ci; - if (from + len > shampo->hd_per_wq) { + if (from + len > end) { len -= end - from; bitmap_set(shampo->bitmap, from, end - from); from = 0; } bitmap_set(shampo->bitmap, from, len); - shampo->ci = (shampo->ci + umr.len) & (shampo->hd_per_wq - 1); + shampo->ci = (shampo->ci + full_len) & (shampo->hd_per_wq - 1); +} + +static void mlx5e_handle_shampo_hd_umr(struct mlx5e_shampo_umr umr, + struct mlx5e_icosq *sq) +{ + struct mlx5e_channel *c = container_of(sq, struct mlx5e_channel, icosq); + /* assume 1:1 relationship between RQ and icosq */ + struct mlx5e_rq *rq = &c->rq; + + mlx5e_shampo_fill_umr(rq, umr.len); } int mlx5e_poll_ico_cq(struct mlx5e_cq *cq) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 979c49ae6b5cc987eeac168adfc03f2278a0d91e..b43ca0b762c306112e8ead14a3e7cda8dcdf0738 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -207,6 +207,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + struct devlink *devlink = priv_to_devlink(dev); /* if this is the driver that initiated the fw reset, devlink completed the reload */ if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { @@ -218,9 +219,11 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else mlx5_load_one(dev, true); - devlink_remote_reload_actions_performed(priv_to_devlink(dev), 0, + devl_lock(devlink); + devlink_remote_reload_actions_performed(devlink, 0, BIT(DEVLINK_RELOAD_ACTION_DRIVER_REINIT) | BIT(DEVLINK_RELOAD_ACTION_FW_ACTIVATE)); + devl_unlock(devlink); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index f7b01b3f0cba360449533c03c148e2bbc3d53e0d..1477db7f5307e095ca1025417598c444215893d1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -48,6 +48,7 @@ static struct mlx5_irq * irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_desc) { struct irq_affinity_desc auto_desc = {}; + struct mlx5_irq *irq; u32 irq_index; int err; @@ -64,9 +65,12 @@ irq_pool_request_irq(struct mlx5_irq_pool *pool, struct irq_affinity_desc *af_de else cpu_get(pool, cpumask_first(&af_desc->mask)); } - return mlx5_irq_alloc(pool, irq_index, - cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, - NULL); + irq = mlx5_irq_alloc(pool, irq_index, + cpumask_empty(&auto_desc.mask) ? af_desc : &auto_desc, + NULL); + if (IS_ERR(irq)) + xa_erase(&pool->irqs, irq_index); + return irq; } /* Looking for the IRQ with the smallest refcount that fits req_mask. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d0871c46b8c54376154cdf36e90c13636b7582ef..cf8045b926892e2d9d1a1e01463539243a8d6ea6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -1538,7 +1538,7 @@ u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, goto unlock; for (i = 0; i < ldev->ports; i++) { - if (ldev->pf[MLX5_LAG_P1].netdev == slave) { + if (ldev->pf[i].netdev == slave) { port = i; break; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index 234cd00f71a1cb7feacf94caca9c0342be3878b6..b7d4b1a2baf2ec22892c705aef740e94eed07d16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -386,7 +386,8 @@ static int ipsec_fs_roce_tx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } @@ -455,7 +456,8 @@ static int ipsec_fs_roce_rx_mpv_create(struct mlx5_core_dev *mdev, return -EOPNOTSUPP; peer_priv = mlx5_devcom_get_next_peer_data(*ipsec_roce->devcom, &tmp); - if (!peer_priv) { + if (!peer_priv || !peer_priv->ipsec) { + mlx5_core_err(mdev, "IPsec not supported on master device\n"); err = -EOPNOTSUPP; goto release_peer; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index f6deb5a3f82024e528436af8fd66d238518a6588..eeb0b7ea05f1260b4177e45d38e76da367897307 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -126,7 +126,7 @@ static bool mlx5_sd_is_supported(struct mlx5_core_dev *dev, u8 host_buses) } static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, - u8 *host_buses, u8 *sd_group) + u8 *host_buses) { u32 out[MLX5_ST_SZ_DW(mpir_reg)]; int err; @@ -135,10 +135,6 @@ static int mlx5_query_sd(struct mlx5_core_dev *dev, bool *sdm, if (err) return err; - err = mlx5_query_nic_vport_sd_group(dev, sd_group); - if (err) - return err; - *sdm = MLX5_GET(mpir_reg, out, sdm); *host_buses = MLX5_GET(mpir_reg, out, host_buses); @@ -166,19 +162,23 @@ static int sd_init(struct mlx5_core_dev *dev) if (mlx5_core_is_ecpf(dev)) return 0; + err = mlx5_query_nic_vport_sd_group(dev, &sd_group); + if (err) + return err; + + if (!sd_group) + return 0; + if (!MLX5_CAP_MCAM_REG(dev, mpir)) return 0; - err = mlx5_query_sd(dev, &sdm, &host_buses, &sd_group); + err = mlx5_query_sd(dev, &sdm, &host_buses); if (err) return err; if (!sdm) return 0; - if (!sd_group) - return 0; - group_id = mlx5_sd_group_id(dev, sd_group); if (!mlx5_sd_is_supported(dev, host_buses)) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 527da58c79535b5895d5c2045409279174c2d506..5b7e6f4b5c7ea10efd4b47eb621726482e83bb72 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2142,7 +2142,6 @@ static int mlx5_try_fast_unload(struct mlx5_core_dev *dev) /* Panic tear down fw command will stop the PCI bus communication * with the HCA, so the health poll is no longer needed. */ - mlx5_drain_health_wq(dev); mlx5_stop_health_poll(dev, false); ret = mlx5_cmd_fast_teardown_hca(dev); @@ -2177,6 +2176,7 @@ static void shutdown(struct pci_dev *pdev) mlx5_core_info(dev, "Shutdown was called\n"); set_bit(MLX5_BREAK_FW_WAIT, &dev->intf_state); + mlx5_drain_health_wq(dev); err = mlx5_try_fast_unload(dev); if (err) mlx5_unload_one(dev, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index b2986175d9afe80abb0e109fcfe56590f14b0cf7..b706f1486504a7c90049d9071c6541a7016067de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -112,6 +112,7 @@ static void mlx5_sf_dev_shutdown(struct auxiliary_device *adev) struct mlx5_core_dev *mdev = sf_dev->mdev; set_bit(MLX5_BREAK_FW_WAIT, &mdev->intf_state); + mlx5_drain_health_wq(mdev); mlx5_unload_one(mdev, false); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c index 042ca0349124332ab875d11b9c8112b9b1173de4..d1db04baa1fa6f01a297879dd383aa42ace22a6e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c @@ -7,7 +7,7 @@ /* don't try to optimize STE allocation if the stack is too constaraining */ #define DR_RULE_MAX_STES_OPTIMIZED 0 #else -#define DR_RULE_MAX_STES_OPTIMIZED 5 +#define DR_RULE_MAX_STES_OPTIMIZED 2 #endif #define DR_RULE_MAX_STE_CHAIN_OPTIMIZED (DR_RULE_MAX_STES_OPTIMIZED + DR_ACTION_MAX_STES) diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h index bc94e75a7aebd1736496fef7fc7ade7302ab1388..e7777700ee18a7e16d1bdcf9a61747e4e5b7f839 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige.h @@ -40,6 +40,7 @@ */ #define MLXBF_GIGE_BCAST_MAC_FILTER_IDX 0 #define MLXBF_GIGE_LOCAL_MAC_FILTER_IDX 1 +#define MLXBF_GIGE_MAX_FILTER_IDX 3 /* Define for broadcast MAC literal */ #define BCAST_MAC_ADDR 0xFFFFFFFFFFFF @@ -175,6 +176,13 @@ enum mlxbf_gige_res { int mlxbf_gige_mdio_probe(struct platform_device *pdev, struct mlxbf_gige *priv); void mlxbf_gige_mdio_remove(struct mlxbf_gige *priv); + +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv); +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index); void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 dmac); void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c index b157f0f1c5a8864e950d148b3b35ce9d47584b24..385a56ac73481ad28b5873b262241dcd0068241d 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_main.c @@ -168,6 +168,10 @@ static int mlxbf_gige_open(struct net_device *netdev) if (err) goto napi_deinit; + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_BCAST_MAC_FILTER_IDX); + mlxbf_gige_enable_mac_rx_filter(priv, MLXBF_GIGE_LOCAL_MAC_FILTER_IDX); + mlxbf_gige_enable_multicast_rx(priv); + /* Set bits in INT_EN that we care about */ int_en = MLXBF_GIGE_INT_EN_HW_ACCESS_ERROR | MLXBF_GIGE_INT_EN_TX_CHECKSUM_INPUTS | @@ -379,6 +383,7 @@ static int mlxbf_gige_probe(struct platform_device *pdev) void __iomem *plu_base; void __iomem *base; int addr, phy_irq; + unsigned int i; int err; base = devm_platform_ioremap_resource(pdev, MLXBF_GIGE_RES_MAC); @@ -423,6 +428,11 @@ static int mlxbf_gige_probe(struct platform_device *pdev) priv->rx_q_entries = MLXBF_GIGE_DEFAULT_RXQ_SZ; priv->tx_q_entries = MLXBF_GIGE_DEFAULT_TXQ_SZ; + for (i = 0; i <= MLXBF_GIGE_MAX_FILTER_IDX; i++) + mlxbf_gige_disable_mac_rx_filter(priv, i); + mlxbf_gige_disable_multicast_rx(priv); + mlxbf_gige_disable_promisc(priv); + /* Write initial MAC address to hardware */ mlxbf_gige_initial_mac(priv); diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h index 98a8681c21b9ca8dedf0884435608de55b5feda7..4d14cb13fd64e1fe6970c6dcd6d747a36a1275cc 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_regs.h @@ -62,6 +62,8 @@ #define MLXBF_GIGE_TX_STATUS_DATA_FIFO_FULL BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_START 0x0520 #define MLXBF_GIGE_RX_MAC_FILTER_DMAC_RANGE_END 0x0528 +#define MLXBF_GIGE_RX_MAC_FILTER_GENERAL 0x0530 +#define MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST BIT(1) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC 0x0540 #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_DISC_EN BIT(0) #define MLXBF_GIGE_RX_MAC_FILTER_COUNT_PASS 0x0548 diff --git a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c index 6999843584934808dfc376c22e59274c0b0f15b8..eb62620b63c7fc6fcae084444873efcb666b304f 100644 --- a/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c +++ b/drivers/net/ethernet/mellanox/mlxbf_gige/mlxbf_gige_rx.c @@ -11,15 +11,31 @@ #include "mlxbf_gige.h" #include "mlxbf_gige_regs.h" -void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, - unsigned int index, u64 dmac) +void mlxbf_gige_enable_multicast_rx(struct mlxbf_gige *priv) { void __iomem *base = priv->base; - u64 control; + u64 data; - /* Write destination MAC to specified MAC RX filter */ - writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + - (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data |= MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_disable_multicast_rx(struct mlxbf_gige *priv) +{ + void __iomem *base = priv->base; + u64 data; + + data = readq(base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); + data &= ~MLXBF_GIGE_RX_MAC_FILTER_EN_MULTICAST; + writeq(data, base + MLXBF_GIGE_RX_MAC_FILTER_GENERAL); +} + +void mlxbf_gige_enable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; /* Enable MAC receive filter mask for specified index */ control = readq(base + MLXBF_GIGE_CONTROL); @@ -27,6 +43,28 @@ void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, writeq(control, base + MLXBF_GIGE_CONTROL); } +void mlxbf_gige_disable_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index) +{ + void __iomem *base = priv->base; + u64 control; + + /* Disable MAC receive filter mask for specified index */ + control = readq(base + MLXBF_GIGE_CONTROL); + control &= ~(MLXBF_GIGE_CONTROL_EN_SPECIFIC_MAC << index); + writeq(control, base + MLXBF_GIGE_CONTROL); +} + +void mlxbf_gige_set_mac_rx_filter(struct mlxbf_gige *priv, + unsigned int index, u64 dmac) +{ + void __iomem *base = priv->base; + + /* Write destination MAC to specified MAC RX filter */ + writeq(dmac, base + MLXBF_GIGE_RX_MAC_FILTER + + (index * MLXBF_GIGE_RX_MAC_FILTER_STRIDE)); +} + void mlxbf_gige_get_mac_rx_filter(struct mlxbf_gige *priv, unsigned int index, u64 *dmac) { diff --git a/drivers/net/ethernet/meta/Kconfig b/drivers/net/ethernet/meta/Kconfig index 86034ea4ba5b6128284787833243603fc5587973..c002ede3640202bb4cb2d0628d6b94701b6a9bd2 100644 --- a/drivers/net/ethernet/meta/Kconfig +++ b/drivers/net/ethernet/meta/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_META config FBNIC tristate "Meta Platforms Host Network Interface" depends on X86_64 || COMPILE_TEST - depends on S390=n + depends on !S390 depends on MAX_SKB_FRAGS < 22 depends on PCI_MSI select PHYLINK diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index d2f07e179e86ba5f70e2a844093482fb899f6267..39f56973746d7f70bc5f666f2e4678398fa5cb8b 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -599,7 +599,11 @@ static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, else *headroom = XDP_PACKET_HEADROOM; - *alloc_size = mtu + MANA_RXBUF_PAD + *headroom; + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + + /* Using page pool in this case, so alloc_size is PAGE_SIZE */ + if (*alloc_size < PAGE_SIZE) + *alloc_size = PAGE_SIZE; *datasize = mtu + ETH_HLEN; } @@ -1788,7 +1792,6 @@ static void mana_poll_rx_cq(struct mana_cq *cq) static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) { struct mana_cq *cq = context; - u8 arm_bit; int w; WARN_ON_ONCE(cq->gdma_cq != gdma_queue); @@ -1799,16 +1802,23 @@ static int mana_cq_handler(void *context, struct gdma_queue *gdma_queue) mana_poll_tx_cq(cq); w = cq->work_done; - - if (w < cq->budget && - napi_complete_done(&cq->napi, w)) { - arm_bit = SET_ARM_BIT; - } else { - arm_bit = 0; + cq->work_done_since_doorbell += w; + + if (w < cq->budget) { + mana_gd_ring_cq(gdma_queue, SET_ARM_BIT); + cq->work_done_since_doorbell = 0; + napi_complete_done(&cq->napi, w); + } else if (cq->work_done_since_doorbell > + cq->gdma_cq->queue_size / COMP_ENTRY_SIZE * 4) { + /* MANA hardware requires at least one doorbell ring every 8 + * wraparounds of CQ even if there is no need to arm the CQ. + * This driver rings the doorbell as soon as we have exceeded + * 4 wraparounds. + */ + mana_gd_ring_cq(gdma_queue, 0); + cq->work_done_since_doorbell = 0; } - mana_gd_ring_cq(gdma_queue, arm_bit); - return w; } diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index ed2fb44500b0cb1a0fbfdae04c2568c4fa4fef8c..3d72aa7b1305039cc3bec709e61c81041d59f7b4 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -453,9 +453,158 @@ static u16 ocelot_vlan_unaware_pvid(struct ocelot *ocelot, return VLAN_N_VID - bridge_num - 1; } +/** + * ocelot_update_vlan_reclassify_rule() - Make switch aware only to bridge VLAN TPID + * + * @ocelot: Switch private data structure + * @port: Index of ingress port + * + * IEEE 802.1Q-2018 clauses "5.5 C-VLAN component conformance" and "5.6 S-VLAN + * component conformance" suggest that a C-VLAN component should only recognize + * and filter on C-Tags, and an S-VLAN component should only recognize and + * process based on C-Tags. + * + * In Linux, as per commit 1a0b20b25732 ("Merge branch 'bridge-next'"), C-VLAN + * components are largely represented by a bridge with vlan_protocol 802.1Q, + * and S-VLAN components by a bridge with vlan_protocol 802.1ad. + * + * Currently the driver only offloads vlan_protocol 802.1Q, but the hardware + * design is non-conformant, because the switch assigns each frame to a VLAN + * based on an entirely different question, as detailed in figure "Basic VLAN + * Classification Flow" from its manual and reproduced below. + * + * Set TAG_TYPE, PCP, DEI, VID to port-default values in VLAN_CFG register + * if VLAN_AWARE_ENA[port] and frame has outer tag then: + * if VLAN_INNER_TAG_ENA[port] and frame has inner tag then: + * TAG_TYPE = (Frame.InnerTPID <> 0x8100) + * Set PCP, DEI, VID to values from inner VLAN header + * else: + * TAG_TYPE = (Frame.OuterTPID <> 0x8100) + * Set PCP, DEI, VID to values from outer VLAN header + * if VID == 0 then: + * VID = VLAN_CFG.VLAN_VID + * + * Summarized, the switch will recognize both 802.1Q and 802.1ad TPIDs as VLAN + * "with equal rights", and just set the TAG_TYPE bit to 0 (if 802.1Q) or to 1 + * (if 802.1ad). It will classify based on whichever of the tags is "outer", no + * matter what TPID that may have (or "inner", if VLAN_INNER_TAG_ENA[port]). + * + * In the VLAN Table, the TAG_TYPE information is not accessible - just the + * classified VID is - so it is as if each VLAN Table entry is for 2 VLANs: + * C-VLAN X, and S-VLAN X. + * + * Whereas the Linux bridge behavior is to only filter on frames with a TPID + * equal to the vlan_protocol, and treat everything else as VLAN-untagged. + * + * Consider an ingress packet tagged with 802.1ad VID=3 and 802.1Q VID=5, + * received on a bridge vlan_filtering=1 vlan_protocol=802.1Q port. This frame + * should be treated as 802.1Q-untagged, and classified to the PVID of that + * bridge port. Not to VID=3, and not to VID=5. + * + * The VCAP IS1 TCAM has everything we need to overwrite the choices made in + * the basic VLAN classification pipeline: it can match on TAG_TYPE in the key, + * and it can modify the classified VID in the action. Thus, for each port + * under a vlan_filtering bridge, we can insert a rule in VCAP IS1 lookup 0 to + * match on 802.1ad tagged frames and modify their classified VID to the 802.1Q + * PVID of the port. This effectively makes it appear to the outside world as + * if those packets were processed as VLAN-untagged. + * + * The rule needs to be updated each time the bridge PVID changes, and needs + * to be deleted if the bridge PVID is deleted, or if the port becomes + * VLAN-unaware. + */ +static int ocelot_update_vlan_reclassify_rule(struct ocelot *ocelot, int port) +{ + unsigned long cookie = OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port); + struct ocelot_vcap_block *block_vcap_is1 = &ocelot->block[VCAP_IS1]; + struct ocelot_port *ocelot_port = ocelot->ports[port]; + const struct ocelot_bridge_vlan *pvid_vlan; + struct ocelot_vcap_filter *filter; + int err, val, pcp, dei; + bool vid_replace_ena; + u16 vid; + + pvid_vlan = ocelot_port->pvid_vlan; + vid_replace_ena = ocelot_port->vlan_aware && pvid_vlan; + + filter = ocelot_vcap_block_find_filter_by_id(block_vcap_is1, cookie, + false); + if (!vid_replace_ena) { + /* If the reclassification filter doesn't need to exist, delete + * it if it was previously installed, and exit doing nothing + * otherwise. + */ + if (filter) + return ocelot_vcap_filter_del(ocelot, filter); + + return 0; + } + + /* The reclassification rule must apply. See if it already exists + * or if it must be created. + */ + + /* Treating as VLAN-untagged means using as classified VID equal to + * the bridge PVID, and PCP/DEI set to the port default QoS values. + */ + vid = pvid_vlan->vid; + val = ocelot_read_gix(ocelot, ANA_PORT_QOS_CFG, port); + pcp = ANA_PORT_QOS_CFG_QOS_DEFAULT_VAL_X(val); + dei = !!(val & ANA_PORT_QOS_CFG_DP_DEFAULT_VAL); + + if (filter) { + bool changed = false; + + /* Filter exists, just update it */ + if (filter->action.vid != vid) { + filter->action.vid = vid; + changed = true; + } + if (filter->action.pcp != pcp) { + filter->action.pcp = pcp; + changed = true; + } + if (filter->action.dei != dei) { + filter->action.dei = dei; + changed = true; + } + + if (!changed) + return 0; + + return ocelot_vcap_filter_replace(ocelot, filter); + } + + /* Filter doesn't exist, create it */ + filter = kzalloc(sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + filter->key_type = OCELOT_VCAP_KEY_ANY; + filter->ingress_port_mask = BIT(port); + filter->vlan.tpid = OCELOT_VCAP_BIT_1; + filter->prio = 1; + filter->id.cookie = cookie; + filter->id.tc_offload = false; + filter->block_id = VCAP_IS1; + filter->type = OCELOT_VCAP_FILTER_OFFLOAD; + filter->lookup = 0; + filter->action.vid_replace_ena = true; + filter->action.pcp_dei_ena = true; + filter->action.vid = vid; + filter->action.pcp = pcp; + filter->action.dei = dei; + + err = ocelot_vcap_filter_add(ocelot, filter, NULL); + if (err) + kfree(filter); + + return err; +} + /* Default vlan to clasify for untagged frames (may be zero) */ -static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, - const struct ocelot_bridge_vlan *pvid_vlan) +static int ocelot_port_set_pvid(struct ocelot *ocelot, int port, + const struct ocelot_bridge_vlan *pvid_vlan) { struct ocelot_port *ocelot_port = ocelot->ports[port]; u16 pvid = ocelot_vlan_unaware_pvid(ocelot, ocelot_port->bridge); @@ -475,15 +624,23 @@ static void ocelot_port_set_pvid(struct ocelot *ocelot, int port, * happens automatically), but also 802.1p traffic which gets * classified to VLAN 0, but that is always in our RX filter, so it * would get accepted were it not for this setting. + * + * Also, we only support the bridge 802.1Q VLAN protocol, so + * 802.1ad-tagged frames (carrying S-Tags) should be considered + * 802.1Q-untagged, and also dropped. */ if (!pvid_vlan && ocelot_port->vlan_aware) val = ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA; + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA; ocelot_rmw_gix(ocelot, val, ANA_PORT_DROP_CFG_DROP_PRIO_S_TAGGED_ENA | - ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA, + ANA_PORT_DROP_CFG_DROP_PRIO_C_TAGGED_ENA | + ANA_PORT_DROP_CFG_DROP_S_TAGGED_ENA, ANA_PORT_DROP_CFG, port); + + return ocelot_update_vlan_reclassify_rule(ocelot, port); } static struct ocelot_bridge_vlan *ocelot_bridge_vlan_find(struct ocelot *ocelot, @@ -631,7 +788,10 @@ int ocelot_port_vlan_filtering(struct ocelot *ocelot, int port, ANA_PORT_VLAN_CFG_VLAN_POP_CNT_M, ANA_PORT_VLAN_CFG, port); - ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + err = ocelot_port_set_pvid(ocelot, port, ocelot_port->pvid_vlan); + if (err) + return err; + ocelot_port_manage_port_tag(ocelot, port); return 0; @@ -684,9 +844,12 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid, return err; /* Default ingress vlan classification */ - if (pvid) - ocelot_port_set_pvid(ocelot, port, - ocelot_bridge_vlan_find(ocelot, vid)); + if (pvid) { + err = ocelot_port_set_pvid(ocelot, port, + ocelot_bridge_vlan_find(ocelot, vid)); + if (err) + return err; + } /* Untagged egress vlan clasification */ ocelot_port_manage_port_tag(ocelot, port); @@ -712,8 +875,11 @@ int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid) return err; /* Ingress */ - if (del_pvid) - ocelot_port_set_pvid(ocelot, port, NULL); + if (del_pvid) { + err = ocelot_port_set_pvid(ocelot, port, NULL); + if (err) + return err; + } /* Egress */ ocelot_port_manage_port_tag(ocelot, port); @@ -1099,6 +1265,48 @@ void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, } EXPORT_SYMBOL(ocelot_ptp_rx_timestamp); +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_inj_grp); + +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_inj_grp); + +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp) + __acquires(&ocelot->inj_lock) +{ + spin_lock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp); + +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp) + __releases(&ocelot->inj_lock) +{ + spin_unlock(&ocelot->inj_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp); + +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __acquires(&ocelot->xtr_lock) +{ + spin_lock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_lock_xtr_grp_bh); + +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp) + __releases(&ocelot->xtr_lock) +{ + spin_unlock_bh(&ocelot->xtr_lock); +} +EXPORT_SYMBOL_GPL(ocelot_unlock_xtr_grp_bh); + int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) { u64 timestamp, src_port, len; @@ -1109,6 +1317,8 @@ int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **nskb) u32 val, *buf; int err; + lockdep_assert_held(&ocelot->xtr_lock); + err = ocelot_xtr_poll_xfh(ocelot, grp, xfh); if (err) return err; @@ -1184,6 +1394,8 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) { u32 val = ocelot_read(ocelot, QS_INJ_STATUS); + lockdep_assert_held(&ocelot->inj_lock); + if (!(val & QS_INJ_STATUS_FIFO_RDY(BIT(grp)))) return false; if (val & QS_INJ_STATUS_WMARK_REACHED(BIT(grp))) @@ -1193,28 +1405,55 @@ bool ocelot_can_inject(struct ocelot *ocelot, int grp) } EXPORT_SYMBOL(ocelot_can_inject); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag) +/** + * ocelot_ifh_set_basic - Set basic information in Injection Frame Header + * @ifh: Pointer to Injection Frame Header memory + * @ocelot: Switch private data structure + * @port: Egress port number + * @rew_op: Egress rewriter operation for PTP + * @skb: Pointer to socket buffer (packet) + * + * Populate the Injection Frame Header with basic information for this skb: the + * analyzer bypass bit, destination port, VLAN info, egress rewriter info. + */ +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb) { + struct ocelot_port *ocelot_port = ocelot->ports[port]; + struct net_device *dev = skb->dev; + u64 vlan_tci, tag_type; + int qos_class; + + ocelot_xmit_get_vlan_info(skb, ocelot_port->bridge, &vlan_tci, + &tag_type); + + qos_class = netdev_get_num_tc(dev) ? + netdev_get_prio_tc_map(dev, skb->priority) : skb->priority; + + memset(ifh, 0, OCELOT_TAG_LEN); ocelot_ifh_set_bypass(ifh, 1); + ocelot_ifh_set_src(ifh, BIT_ULL(ocelot->num_phys_ports)); ocelot_ifh_set_dest(ifh, BIT_ULL(port)); - ocelot_ifh_set_tag_type(ifh, IFH_TAG_TYPE_C); - if (vlan_tag) - ocelot_ifh_set_vlan_tci(ifh, vlan_tag); + ocelot_ifh_set_qos_class(ifh, qos_class); + ocelot_ifh_set_tag_type(ifh, tag_type); + ocelot_ifh_set_vlan_tci(ifh, vlan_tci); if (rew_op) ocelot_ifh_set_rew_op(ifh, rew_op); } -EXPORT_SYMBOL(ocelot_ifh_port_set); +EXPORT_SYMBOL(ocelot_ifh_set_basic); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb) { - u32 ifh[OCELOT_TAG_LEN / 4] = {0}; + u32 ifh[OCELOT_TAG_LEN / 4]; unsigned int i, count, last; + lockdep_assert_held(&ocelot->inj_lock); + ocelot_write_rix(ocelot, QS_INJ_CTRL_GAP_SIZE(1) | QS_INJ_CTRL_SOF, QS_INJ_CTRL, grp); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); for (i = 0; i < OCELOT_TAG_LEN / 4; i++) ocelot_write_rix(ocelot, ifh[i], QS_INJ_WR, grp); @@ -1247,6 +1486,8 @@ EXPORT_SYMBOL(ocelot_port_inject_frame); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp) { + lockdep_assert_held(&ocelot->xtr_lock); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) ocelot_read_rix(ocelot, QS_XTR_RD, grp); } @@ -2532,7 +2773,7 @@ int ocelot_port_set_default_prio(struct ocelot *ocelot, int port, u8 prio) ANA_PORT_QOS_CFG, port); - return 0; + return ocelot_update_vlan_reclassify_rule(ocelot, port); } EXPORT_SYMBOL_GPL(ocelot_port_set_default_prio); @@ -2929,6 +3170,8 @@ int ocelot_init(struct ocelot *ocelot) mutex_init(&ocelot->fwd_domain_lock); spin_lock_init(&ocelot->ptp_clock_lock); spin_lock_init(&ocelot->ts_id_lock); + spin_lock_init(&ocelot->inj_lock); + spin_lock_init(&ocelot->xtr_lock); ocelot->owq = alloc_ordered_workqueue("ocelot-owq", 0); if (!ocelot->owq) diff --git a/drivers/net/ethernet/mscc/ocelot_fdma.c b/drivers/net/ethernet/mscc/ocelot_fdma.c index 312a468321544dc968a297a53ec8041359dd256a..00326ae8c708b0b6b395193ce101504172631c56 100644 --- a/drivers/net/ethernet/mscc/ocelot_fdma.c +++ b/drivers/net/ethernet/mscc/ocelot_fdma.c @@ -665,8 +665,7 @@ static int ocelot_fdma_prepare_skb(struct ocelot *ocelot, int port, u32 rew_op, ifh = skb_push(skb, OCELOT_TAG_LEN); skb_put(skb, ETH_FCS_LEN); - memset(ifh, 0, OCELOT_TAG_LEN); - ocelot_ifh_port_set(ifh, port, rew_op, skb_vlan_tag_get(skb)); + ocelot_ifh_set_basic(ifh, ocelot, port, rew_op, skb); return 0; } diff --git a/drivers/net/ethernet/mscc/ocelot_vcap.c b/drivers/net/ethernet/mscc/ocelot_vcap.c index 73cdec5ca6a34d00eef87ac1754f0e0354b4c567..5734b86aed5b53029c7345b63d54aaed76e099dc 100644 --- a/drivers/net/ethernet/mscc/ocelot_vcap.c +++ b/drivers/net/ethernet/mscc/ocelot_vcap.c @@ -695,6 +695,7 @@ static void is1_entry_set(struct ocelot *ocelot, int ix, vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_MC, filter->dmac_mc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_L2_BC, filter->dmac_bc); vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_VLAN_TAGGED, tag->tagged); + vcap_key_bit_set(vcap, &data, VCAP_IS1_HK_TPID, tag->tpid); vcap_key_set(vcap, &data, VCAP_IS1_HK_VID, tag->vid.value, tag->vid.mask); vcap_key_set(vcap, &data, VCAP_IS1_HK_PCP, diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index 993212c3a7da6f76139abc2273de216e4e515217..c09dd2e3343cbaeaa31f0c2016e3be4ab03b0fd3 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -51,6 +51,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg) struct ocelot *ocelot = arg; int grp = 0, err; + ocelot_lock_xtr_grp(ocelot, grp); + while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp)) { struct sk_buff *skb; @@ -69,6 +71,8 @@ out: if (err < 0) ocelot_drain_cpu_queue(ocelot, 0); + ocelot_unlock_xtr_grp(ocelot, grp); + return IRQ_HANDLED; } diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 714d2e80469404ef0e950998a8cbce85449fa8f2..3507c2e28110d23b896c41044708e605130a2f0f 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -4349,7 +4349,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb, if (unlikely(!rtl_tx_slots_avail(tp))) { if (net_ratelimit()) netdev_err(dev, "BUG! Tx Ring full when queue awake!\n"); - goto err_stop_0; + netif_stop_queue(dev); + return NETDEV_TX_BUSY; } opts[1] = rtl8169_tx_vlan_tag(skb); @@ -4405,11 +4406,6 @@ err_dma_0: dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; - -err_stop_0: - netif_stop_queue(dev); - dev->stats.tx_dropped++; - return NETDEV_TX_BUSY; } static unsigned int rtl_last_frag_len(struct sk_buff *skb) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index d3c5306f1c41f7aca6f6a9170f7946a3abfe6cd0..93a78fd0737b6c93c2bef9ed7ad87de50dcac52d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -573,8 +573,6 @@ static inline u32 mtl_low_credx_base_addr(const struct dwmac4_addrs *addrs, #define GMAC_PHYIF_CTRLSTATUS_LNKSTS BIT(19) #define GMAC_PHYIF_CTRLSTATUS_JABTO BIT(20) #define GMAC_PHYIF_CTRLSTATUS_FALSECARDET BIT(21) -/* LNKMOD */ -#define GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK 0x1 /* LNKSPEED */ #define GMAC_PHYIF_CTRLSTATUS_SPEED_125 0x2 #define GMAC_PHYIF_CTRLSTATUS_SPEED_25 0x1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c index f98741d2607ec45b6558d3da817c3e10792386c8..31c387cc5f269dd604d1d810137c80ac53b0d63e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c @@ -786,7 +786,7 @@ static void dwmac4_phystatus(void __iomem *ioaddr, struct stmmac_extra_stats *x) else x->pcs_speed = SPEED_10; - x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD_MASK); + x->pcs_duplex = (status & GMAC_PHYIF_CTRLSTATUS_LNKMOD); pr_info("Link is Up - %d/%s\n", (int)x->pcs_speed, x->pcs_duplex ? "Full" : "Half"); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c index ec54b18c5fe73b942cdeb4c362168c06e7b16a28..a5e9b779c44d0100e16a3e380964cafeeaabc4a7 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_mdio.c @@ -124,8 +124,12 @@ static int ngbe_phylink_init(struct wx *wx) MAC_SYM_PAUSE | MAC_ASYM_PAUSE; config->mac_managed_pm = true; - phy_mode = PHY_INTERFACE_MODE_RGMII_ID; - __set_bit(PHY_INTERFACE_MODE_RGMII_ID, config->supported_interfaces); + /* The MAC only has add the Tx delay and it can not be modified. + * So just disable TX delay in PHY, and it is does not matter to + * internal phy. + */ + phy_mode = PHY_INTERFACE_MODE_RGMII_RXID; + __set_bit(PHY_INTERFACE_MODE_RGMII_RXID, config->supported_interfaces); phylink = phylink_create(config, NULL, phy_mode, &ngbe_mac_ops); if (IS_ERR(phylink)) diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet.h b/drivers/net/ethernet/xilinx/xilinx_axienet.h index fa5500decc9602c38ccf930cb6983a401f5b9c1f..09c9f9787180bf1dd9faedcdb337322c0211927b 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet.h +++ b/drivers/net/ethernet/xilinx/xilinx_axienet.h @@ -160,16 +160,17 @@ #define XAE_RCW1_OFFSET 0x00000404 /* Rx Configuration Word 1 */ #define XAE_TC_OFFSET 0x00000408 /* Tx Configuration */ #define XAE_FCC_OFFSET 0x0000040C /* Flow Control Configuration */ -#define XAE_EMMC_OFFSET 0x00000410 /* EMAC mode configuration */ -#define XAE_PHYC_OFFSET 0x00000414 /* RGMII/SGMII configuration */ +#define XAE_EMMC_OFFSET 0x00000410 /* MAC speed configuration */ +#define XAE_PHYC_OFFSET 0x00000414 /* RX Max Frame Configuration */ #define XAE_ID_OFFSET 0x000004F8 /* Identification register */ -#define XAE_MDIO_MC_OFFSET 0x00000500 /* MII Management Config */ -#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MII Management Control */ -#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MII Management Write Data */ -#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MII Management Read Data */ +#define XAE_MDIO_MC_OFFSET 0x00000500 /* MDIO Setup */ +#define XAE_MDIO_MCR_OFFSET 0x00000504 /* MDIO Control */ +#define XAE_MDIO_MWD_OFFSET 0x00000508 /* MDIO Write Data */ +#define XAE_MDIO_MRD_OFFSET 0x0000050C /* MDIO Read Data */ #define XAE_UAW0_OFFSET 0x00000700 /* Unicast address word 0 */ #define XAE_UAW1_OFFSET 0x00000704 /* Unicast address word 1 */ -#define XAE_FMI_OFFSET 0x00000708 /* Filter Mask Index */ +#define XAE_FMI_OFFSET 0x00000708 /* Frame Filter Control */ +#define XAE_FFE_OFFSET 0x0000070C /* Frame Filter Enable */ #define XAE_AF0_OFFSET 0x00000710 /* Address Filter 0 */ #define XAE_AF1_OFFSET 0x00000714 /* Address Filter 1 */ @@ -308,7 +309,7 @@ */ #define XAE_UAW1_UNICASTADDR_MASK 0x0000FFFF -/* Bit masks for Axi Ethernet FMI register */ +/* Bit masks for Axi Ethernet FMC register */ #define XAE_FMI_PM_MASK 0x80000000 /* Promis. mode enable */ #define XAE_FMI_IND_MASK 0x00000003 /* Index Mask */ diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index e342f387c3dd643cb2cb173524484cf2004547ae..9aeb7b9f3ae408e2b843fc6d24c47fb04648b708 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -432,7 +432,7 @@ static int netdev_set_mac_address(struct net_device *ndev, void *p) */ static void axienet_set_multicast_list(struct net_device *ndev) { - int i; + int i = 0; u32 reg, af0reg, af1reg; struct axienet_local *lp = netdev_priv(ndev); @@ -450,7 +450,10 @@ static void axienet_set_multicast_list(struct net_device *ndev) } else if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; - i = 0; + reg = axienet_ior(lp, XAE_FMI_OFFSET); + reg &= ~XAE_FMI_PM_MASK; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + netdev_for_each_mc_addr(ha, ndev) { if (i >= XAE_MULTICAST_CAM_TABLE_NUM) break; @@ -469,6 +472,7 @@ static void axienet_set_multicast_list(struct net_device *ndev) axienet_iow(lp, XAE_FMI_OFFSET, reg); axienet_iow(lp, XAE_AF0_OFFSET, af0reg); axienet_iow(lp, XAE_AF1_OFFSET, af1reg); + axienet_iow(lp, XAE_FFE_OFFSET, 1); i++; } } else { @@ -476,18 +480,15 @@ static void axienet_set_multicast_list(struct net_device *ndev) reg &= ~XAE_FMI_PM_MASK; axienet_iow(lp, XAE_FMI_OFFSET, reg); - - for (i = 0; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { - reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; - reg |= i; - - axienet_iow(lp, XAE_FMI_OFFSET, reg); - axienet_iow(lp, XAE_AF0_OFFSET, 0); - axienet_iow(lp, XAE_AF1_OFFSET, 0); - } - dev_info(&ndev->dev, "Promiscuous mode disabled.\n"); } + + for (; i < XAE_MULTICAST_CAM_TABLE_NUM; i++) { + reg = axienet_ior(lp, XAE_FMI_OFFSET) & 0xFFFFFF00; + reg |= i; + axienet_iow(lp, XAE_FMI_OFFSET, reg); + axienet_iow(lp, XAE_FFE_OFFSET, 0); + } } /** @@ -2219,9 +2220,9 @@ static void axienet_dma_err_handler(struct work_struct *work) ~(XAE_OPTION_TXEN | XAE_OPTION_RXEN)); axienet_set_mac_address(ndev, NULL); axienet_set_multicast_list(ndev); - axienet_setoptions(ndev, lp->options); napi_enable(&lp->napi_rx); napi_enable(&lp->napi_tx); + axienet_setoptions(ndev, lp->options); } /** diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index b3ddc9a629d9d95463977b7ae50e52683257c95a..fad5b65644645ded60601d4a15ef606ef19b3a13 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -14,9 +14,7 @@ #include "fjes.h" #include "fjes_trace.h" -#define MAJ 1 -#define MIN 2 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) +#define DRV_VERSION "1.2" #define DRV_NAME "fjes" char fjes_driver_name[] = DRV_NAME; char fjes_driver_version[] = DRV_VERSION; diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 427b91aca50d3a370e4ad00071f0c477fefa5076..0696faf60013e0669ceb4aa39dfbac17ad741540 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -1269,6 +1269,9 @@ static netdev_tx_t gtp_dev_xmit(struct sk_buff *skb, struct net_device *dev) if (skb_cow_head(skb, dev->needed_headroom)) goto tx_err; + if (!pskb_inet_may_pull(skb)) + goto tx_err; + skb_reset_inner_headers(skb); /* PDP context lookups in gtp_build_skb_*() need rcu read-side lock. */ diff --git a/drivers/net/phy/aquantia/aquantia_main.c b/drivers/net/phy/aquantia/aquantia_main.c index d12e353742318ef8f2951947ac8e19c21220d243..e982e9ce44a5961005495482e0d7146c625c2e91 100644 --- a/drivers/net/phy/aquantia/aquantia_main.c +++ b/drivers/net/phy/aquantia/aquantia_main.c @@ -653,13 +653,7 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) unsigned long *possible = phydev->possible_interfaces; unsigned int serdes_mode, rate_adapt; phy_interface_t interface; - int i, val, ret; - - ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, - VEND1_GLOBAL_CFG_10M, val, val != 0, - 1000, 100000, false); - if (ret) - return ret; + int i, val; /* Walk the media-speed configuration registers to determine which * host-side serdes modes may be used by the PHY depending on the @@ -708,6 +702,25 @@ static int aqr107_fill_interface_modes(struct phy_device *phydev) return 0; } +static int aqr113c_fill_interface_modes(struct phy_device *phydev) +{ + int val, ret; + + /* It's been observed on some models that - when coming out of suspend + * - the FW signals that the PHY is ready but the GLOBAL_CFG registers + * continue on returning zeroes for some time. Let's poll the 100M + * register until it returns a real value as both 113c and 115c support + * this mode. + */ + ret = phy_read_mmd_poll_timeout(phydev, MDIO_MMD_VEND1, + VEND1_GLOBAL_CFG_100M, val, val != 0, + 1000, 100000, false); + if (ret) + return ret; + + return aqr107_fill_interface_modes(phydev); +} + static int aqr113c_config_init(struct phy_device *phydev) { int ret; @@ -725,7 +738,7 @@ static int aqr113c_config_init(struct phy_device *phydev) if (ret) return ret; - return aqr107_fill_interface_modes(phydev); + return aqr113c_fill_interface_modes(phydev); } static int aqr107_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index dd519805deee318270758c5955859d570d355977..65b0a3115e14cd99a44b9fefe015c1b6d829d4ac 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1389,6 +1389,8 @@ static int ksz9131_config_init(struct phy_device *phydev) const struct device *dev_walker; int ret; + phydev->mdix_ctrl = ETH_TP_MDI_AUTO; + dev_walker = &phydev->mdio.dev; do { of_node = dev_walker->of_node; @@ -1438,28 +1440,30 @@ static int ksz9131_config_init(struct phy_device *phydev) #define MII_KSZ9131_AUTO_MDIX 0x1C #define MII_KSZ9131_AUTO_MDI_SET BIT(7) #define MII_KSZ9131_AUTO_MDIX_SWAP_OFF BIT(6) +#define MII_KSZ9131_DIG_AXAN_STS 0x14 +#define MII_KSZ9131_DIG_AXAN_STS_LINK_DET BIT(14) +#define MII_KSZ9131_DIG_AXAN_STS_A_SELECT BIT(12) static int ksz9131_mdix_update(struct phy_device *phydev) { int ret; - ret = phy_read(phydev, MII_KSZ9131_AUTO_MDIX); - if (ret < 0) - return ret; - - if (ret & MII_KSZ9131_AUTO_MDIX_SWAP_OFF) { - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix_ctrl = ETH_TP_MDI; - else - phydev->mdix_ctrl = ETH_TP_MDI_X; + if (phydev->mdix_ctrl != ETH_TP_MDI_AUTO) { + phydev->mdix = phydev->mdix_ctrl; } else { - phydev->mdix_ctrl = ETH_TP_MDI_AUTO; - } + ret = phy_read(phydev, MII_KSZ9131_DIG_AXAN_STS); + if (ret < 0) + return ret; - if (ret & MII_KSZ9131_AUTO_MDI_SET) - phydev->mdix = ETH_TP_MDI; - else - phydev->mdix = ETH_TP_MDI_X; + if (ret & MII_KSZ9131_DIG_AXAN_STS_LINK_DET) { + if (ret & MII_KSZ9131_DIG_AXAN_STS_A_SELECT) + phydev->mdix = ETH_TP_MDI; + else + phydev->mdix = ETH_TP_MDI_X; + } else { + phydev->mdix = ETH_TP_MDI_INVALID; + } + } return 0; } diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index bed839237fb55b506a693bd4af4d636afafbbe15..25e5bfbb6f89b8176b0a38c30562462c18cc3f2a 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -555,7 +555,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, unsigned long rules) { const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); - u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + u16 reg = 0; if (index >= RTL8211F_LED_COUNT) return -EINVAL; @@ -575,6 +575,7 @@ static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, } reg <<= RTL8211F_LEDCR_SHIFT * index; + reg |= RTL8211F_LEDCR_MODE; /* Mode B */ return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); } @@ -1465,6 +1466,13 @@ static struct phy_driver realtek_drvs[] = { .handle_interrupt = genphy_handle_interrupt_no_ack, .suspend = genphy_suspend, .resume = genphy_resume, + }, { + PHY_ID_MATCH_EXACT(0x001cc960), + .name = "RTL8366S Gigabit Ethernet", + .suspend = genphy_suspend, + .resume = genphy_resume, + .read_mmd = genphy_read_mmd_unsupported, + .write_mmd = genphy_write_mmd_unsupported, }, }; diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c index 897b979ec03c81593505d24fb4a51e502ddb5e83..3b5fcaf0dd36db1667e77428a4fbe912cdd8ef82 100644 --- a/drivers/net/phy/vitesse.c +++ b/drivers/net/phy/vitesse.c @@ -237,16 +237,6 @@ static int vsc739x_config_init(struct phy_device *phydev) return 0; } -static int vsc73xx_config_aneg(struct phy_device *phydev) -{ - /* The VSC73xx switches does not like to be instructed to - * do autonegotiation in any way, it prefers that you just go - * with the power-on/reset defaults. Writing some registers will - * just make autonegotiation permanently fail. - */ - return 0; -} - /* This adds a skew for both TX and RX clocks, so the skew should only be * applied to "rgmii-id" interfaces. It may not work as expected * on "rgmii-txid", "rgmii-rxid" or "rgmii" interfaces. @@ -444,7 +434,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -453,7 +442,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc738x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -462,7 +450,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { @@ -471,7 +458,6 @@ static struct phy_driver vsc82xx_driver[] = { .phy_id_mask = 0x000ffff0, /* PHY_GBIT_FEATURES */ .config_init = vsc739x_config_init, - .config_aneg = vsc73xx_config_aneg, .read_page = vsc73xx_read_page, .write_page = vsc73xx_write_page, }, { diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c index ec20953e0f825cb7fa99e6c91258d715cf30019d..4f032b16a8a0a6ce33911179e60764136c1b0cef 100644 --- a/drivers/net/pse-pd/pse_core.c +++ b/drivers/net/pse-pd/pse_core.c @@ -401,9 +401,14 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev, rdesc->ops = &pse_pi_ops; rdesc->owner = pcdev->owner; - rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS | - REGULATOR_CHANGE_CURRENT; - rinit_data->constraints.max_uA = MAX_PI_CURRENT; + rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS; + + if (pcdev->ops->pi_set_current_limit) { + rinit_data->constraints.valid_ops_mask |= + REGULATOR_CHANGE_CURRENT; + rinit_data->constraints.max_uA = MAX_PI_CURRENT; + } + rinit_data->supply_regulator = "vpwr"; rconfig.dev = pcdev->dev; diff --git a/drivers/net/pse-pd/tps23881.c b/drivers/net/pse-pd/tps23881.c index 61f6ad9c1934e1dc17447886728492aa1a1f509e..2ea75686a3190730aed82045ef113db3abafdca0 100644 --- a/drivers/net/pse-pd/tps23881.c +++ b/drivers/net/pse-pd/tps23881.c @@ -5,6 +5,7 @@ * Copyright (c) 2023 Bootlin, Kory Maincent */ +#include #include #include #include @@ -29,6 +30,8 @@ #define TPS23881_REG_TPON BIT(0) #define TPS23881_REG_FWREV 0x41 #define TPS23881_REG_DEVID 0x43 +#define TPS23881_REG_DEVID_MASK 0xF0 +#define TPS23881_DEVICE_ID 0x02 #define TPS23881_REG_SRAM_CTRL 0x60 #define TPS23881_REG_SRAM_DATA 0x61 @@ -750,7 +753,7 @@ static int tps23881_i2c_probe(struct i2c_client *client) if (ret < 0) return ret; - if (ret != 0x22) { + if (FIELD_GET(TPS23881_REG_DEVID_MASK, ret) != TPS23881_DEVICE_ID) { dev_err(dev, "Wrong device ID\n"); return -ENXIO; } diff --git a/drivers/net/usb/ipheth.c b/drivers/net/usb/ipheth.c index 687d70cfc5563526d3e51d046bcc7a1bf3f00acc..46afb95ffabe3b0075fdf9782af9803610d43016 100644 --- a/drivers/net/usb/ipheth.c +++ b/drivers/net/usb/ipheth.c @@ -286,10 +286,11 @@ static void ipheth_rcvbulk_callback(struct urb *urb) return; } - if (urb->actual_length <= IPHETH_IP_ALIGN) { - dev->net->stats.rx_length_errors++; - return; - } + /* iPhone may periodically send URBs with no payload + * on the "bulk in" endpoint. It is safe to ignore them. + */ + if (urb->actual_length == 0) + goto rx_submit; /* RX URBs starting with 0x00 0x01 do not encapsulate Ethernet frames, * but rather are control frames. Their purpose is not documented, and @@ -298,7 +299,8 @@ static void ipheth_rcvbulk_callback(struct urb *urb) * URB received from the bulk IN endpoint. */ if (unlikely - (((char *)urb->transfer_buffer)[0] == 0 && + (urb->actual_length == 4 && + ((char *)urb->transfer_buffer)[0] == 0 && ((char *)urb->transfer_buffer)[1] == 1)) goto rx_submit; @@ -306,7 +308,6 @@ static void ipheth_rcvbulk_callback(struct urb *urb) if (retval != 0) { dev_err(&dev->intf->dev, "%s: callback retval: %d\n", __func__, retval); - return; } rx_submit: @@ -354,13 +355,14 @@ static int ipheth_carrier_set(struct ipheth_device *dev) 0x02, /* index */ dev->ctrl_buf, IPHETH_CTRL_BUF_SIZE, IPHETH_CTRL_TIMEOUT); - if (retval < 0) { + if (retval <= 0) { dev_err(&dev->intf->dev, "%s: usb_control_msg: %d\n", __func__, retval); return retval; } - if (dev->ctrl_buf[0] == IPHETH_CARRIER_ON) { + if ((retval == 1 && dev->ctrl_buf[0] == IPHETH_CARRIER_ON) || + (retval >= 2 && dev->ctrl_buf[1] == IPHETH_CARRIER_ON)) { netif_carrier_on(dev->net); if (dev->tx_urb->status != -EINPROGRESS) netif_wake_queue(dev->net); @@ -475,8 +477,8 @@ static int ipheth_close(struct net_device *net) { struct ipheth_device *dev = netdev_priv(net); - cancel_delayed_work_sync(&dev->carrier_work); netif_stop_queue(net); + cancel_delayed_work_sync(&dev->carrier_work); return 0; } diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 386d62769dedb1411150597b5ddc4d951e91b666..4823dbdf54656f08c6c590ee6641a932d43ef0c2 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -201,6 +201,7 @@ static int qmimux_rx_fixup(struct usbnet *dev, struct sk_buff *skb) break; default: /* not ip - do not know what to do */ + kfree_skb(skbn); goto skip; } @@ -1431,6 +1432,7 @@ static const struct usb_device_id products[] = { {QMI_QUIRK_SET_DTR(0x1546, 0x1312, 4)}, /* u-blox LARA-R6 01B */ {QMI_QUIRK_SET_DTR(0x1546, 0x1342, 4)}, /* u-blox LARA-L6 */ {QMI_QUIRK_SET_DTR(0x33f8, 0x0104, 4)}, /* Rolling RW101 RMNET */ + {QMI_FIXED_INTF(0x2dee, 0x4d22, 5)}, /* MeiG Smart SRM825L */ /* 4. Gobi 1000 devices */ {QMI_GOBI1K_DEVICE(0x05c6, 0x9212)}, /* Acer Gobi Modem Device */ diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c index 0a662e42ed96593cf20d4209af2bc64a74f307df..cb7d2f798fb43622c7e954aeebeab8edede4cb49 100644 --- a/drivers/net/usb/sr9700.c +++ b/drivers/net/usb/sr9700.c @@ -179,6 +179,7 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) struct usbnet *dev = netdev_priv(netdev); __le16 res; int rc = 0; + int err; if (phy_id) { netdev_dbg(netdev, "Only internal phy supported\n"); @@ -189,11 +190,17 @@ static int sr_mdio_read(struct net_device *netdev, int phy_id, int loc) if (loc == MII_BMSR) { u8 value; - sr_read_reg(dev, SR_NSR, &value); + err = sr_read_reg(dev, SR_NSR, &value); + if (err < 0) + return err; + if (value & NSR_LINKST) rc = 1; } - sr_share_read_word(dev, 1, loc, &res); + err = sr_share_read_word(dev, 1, loc, &res); + if (err < 0) + return err; + if (rc == 1) res = le16_to_cpu(res) | BMSR_LSTATUS; else diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0383a3e136d6a07f57d71dbdd5a8ae09ac792a4a..c6af189480929a8ccea4090d7f343380f29b730c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -2867,8 +2867,8 @@ static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index) if (err < 0) goto err_xdp_reg_mem_model; - virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); netdev_tx_reset_queue(netdev_get_tx_queue(vi->dev, qp_index)); + virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi); virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi); return 0; @@ -3658,6 +3658,9 @@ static int virtnet_send_rx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, rxq2vq(queue), max_usecs, max_packets); if (err) @@ -3675,6 +3678,9 @@ static int virtnet_send_tx_ctrl_coal_vq_cmd(struct virtnet_info *vi, { int err; + if (!virtio_has_feature(vi->vdev, VIRTIO_NET_F_VQ_NOTF_COAL)) + return -EOPNOTSUPP; + err = virtnet_send_ctrl_coal_vq_cmd(vi, txq2vq(queue), max_usecs, max_packets); if (err) @@ -3743,7 +3749,11 @@ static int virtnet_set_ringparam(struct net_device *dev, err = virtnet_send_tx_ctrl_coal_vq_cmd(vi, i, vi->intr_coal_tx.max_usecs, vi->intr_coal_tx.max_packets); - if (err) + + /* Don't break the tx resize action if the vq coalescing is not + * supported. The same is true for rx resize below. + */ + if (err && err != -EOPNOTSUPP) return err; } @@ -3758,7 +3768,7 @@ static int virtnet_set_ringparam(struct net_device *dev, vi->intr_coal_rx.max_usecs, vi->intr_coal_rx.max_packets); mutex_unlock(&vi->rq[i].dim_lock); - if (err) + if (err && err != -EOPNOTSUPP) return err; } } diff --git a/drivers/net/wan/fsl_qmc_hdlc.c b/drivers/net/wan/fsl_qmc_hdlc.c index c5e7ca793c43311911e9f7cf0a1c36215a6a641e..8fcfbde31a1c66db1e036cee5151a9b8e58f3c91 100644 --- a/drivers/net/wan/fsl_qmc_hdlc.c +++ b/drivers/net/wan/fsl_qmc_hdlc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,7 @@ struct qmc_hdlc { struct qmc_chan *qmc_chan; struct net_device *netdev; struct framer *framer; - spinlock_t carrier_lock; /* Protect carrier detection */ + struct mutex carrier_lock; /* Protect carrier detection */ struct notifier_block nb; bool is_crc32; spinlock_t tx_lock; /* Protect tx descriptors */ @@ -60,7 +61,7 @@ static int qmc_hdlc_framer_set_carrier(struct qmc_hdlc *qmc_hdlc) if (!qmc_hdlc->framer) return 0; - guard(spinlock_irqsave)(&qmc_hdlc->carrier_lock); + guard(mutex)(&qmc_hdlc->carrier_lock); ret = framer_get_status(qmc_hdlc->framer, &framer_status); if (ret) { @@ -249,6 +250,7 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl struct qmc_hdlc_desc *desc = context; struct net_device *netdev; struct qmc_hdlc *qmc_hdlc; + size_t crc_size; int ret; netdev = desc->netdev; @@ -267,15 +269,26 @@ static void qmc_hcld_recv_complete(void *context, size_t length, unsigned int fl if (flags & QMC_RX_FLAG_HDLC_CRC) /* CRC error */ netdev->stats.rx_crc_errors++; kfree_skb(desc->skb); - } else { - netdev->stats.rx_packets++; - netdev->stats.rx_bytes += length; + goto re_queue; + } - skb_put(desc->skb, length); - desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); - netif_rx(desc->skb); + /* Discard the CRC */ + crc_size = qmc_hdlc->is_crc32 ? 4 : 2; + if (length < crc_size) { + netdev->stats.rx_length_errors++; + kfree_skb(desc->skb); + goto re_queue; } + length -= crc_size; + + netdev->stats.rx_packets++; + netdev->stats.rx_bytes += length; + + skb_put(desc->skb, length); + desc->skb->protocol = hdlc_type_trans(desc->skb, netdev); + netif_rx(desc->skb); +re_queue: /* Re-queue a transfer using the same descriptor */ ret = qmc_hdlc_recv_queue(qmc_hdlc, desc, desc->dma_size); if (ret) { @@ -706,7 +719,7 @@ static int qmc_hdlc_probe(struct platform_device *pdev) qmc_hdlc->dev = dev; spin_lock_init(&qmc_hdlc->tx_lock); - spin_lock_init(&qmc_hdlc->carrier_lock); + mutex_init(&qmc_hdlc->carrier_lock); qmc_hdlc->qmc_chan = devm_qmc_chan_get_bychild(dev, dev->of_node); if (IS_ERR(qmc_hdlc->qmc_chan)) diff --git a/drivers/net/wireless/ath/ath12k/dp_tx.c b/drivers/net/wireless/ath/ath12k/dp_tx.c index d08c04343e9007f61c8e11a543ee3c8c011962bd..44406e0b4a342ff11f6c30b5cc3fc4bc89e22fa0 100644 --- a/drivers/net/wireless/ath/ath12k/dp_tx.c +++ b/drivers/net/wireless/ath/ath12k/dp_tx.c @@ -162,6 +162,60 @@ static int ath12k_dp_prepare_htt_metadata(struct sk_buff *skb) return 0; } +static void ath12k_dp_tx_move_payload(struct sk_buff *skb, + unsigned long delta, + bool head) +{ + unsigned long len = skb->len; + + if (head) { + skb_push(skb, delta); + memmove(skb->data, skb->data + delta, len); + skb_trim(skb, len); + } else { + skb_put(skb, delta); + memmove(skb->data + delta, skb->data, len); + skb_pull(skb, delta); + } +} + +static int ath12k_dp_tx_align_payload(struct ath12k_base *ab, + struct sk_buff **pskb) +{ + u32 iova_mask = ab->hw_params->iova_mask; + unsigned long offset, delta1, delta2; + struct sk_buff *skb2, *skb = *pskb; + unsigned int headroom = skb_headroom(skb); + int tailroom = skb_tailroom(skb); + int ret = 0; + + offset = (unsigned long)skb->data & iova_mask; + delta1 = offset; + delta2 = iova_mask - offset + 1; + + if (headroom >= delta1) { + ath12k_dp_tx_move_payload(skb, delta1, true); + } else if (tailroom >= delta2) { + ath12k_dp_tx_move_payload(skb, delta2, false); + } else { + skb2 = skb_realloc_headroom(skb, iova_mask); + if (!skb2) { + ret = -ENOMEM; + goto out; + } + + dev_kfree_skb_any(skb); + + offset = (unsigned long)skb2->data & iova_mask; + if (offset) + ath12k_dp_tx_move_payload(skb2, offset, true); + *pskb = skb2; + } + +out: + return ret; +} + int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, struct sk_buff *skb) { @@ -184,6 +238,7 @@ int ath12k_dp_tx(struct ath12k *ar, struct ath12k_vif *arvif, bool tcl_ring_retry; bool msdu_ext_desc = false; bool add_htt_metadata = false; + u32 iova_mask = ab->hw_params->iova_mask; if (test_bit(ATH12K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags)) return -ESHUTDOWN; @@ -279,6 +334,23 @@ tcl_ring_sel: goto fail_remove_tx_buf; } + if (iova_mask && + (unsigned long)skb->data & iova_mask) { + ret = ath12k_dp_tx_align_payload(ab, &skb); + if (ret) { + ath12k_warn(ab, "failed to align TX buffer %d\n", ret); + /* don't bail out, give original buffer + * a chance even unaligned. + */ + goto map; + } + + /* hdr is pointing to a wrong place after alignment, + * so refresh it for later use. + */ + hdr = (void *)skb->data; + } +map: ti.paddr = dma_map_single(ab->dev, skb->data, skb->len, DMA_TO_DEVICE); if (dma_mapping_error(ab->dev, ti.paddr)) { atomic_inc(&ab->soc_stats.tx_err.misc_fail); diff --git a/drivers/net/wireless/ath/ath12k/hw.c b/drivers/net/wireless/ath/ath12k/hw.c index 2e11ea7635740ce289ee7d248e71caf3d6d9c78f..7b0b6a7f4701ab31ebbc5c429f9d1a7de4baaa1a 100644 --- a/drivers/net/wireless/ath/ath12k/hw.c +++ b/drivers/net/wireless/ath/ath12k/hw.c @@ -924,6 +924,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, { .name = "wcn7850 hw2.0", @@ -1000,6 +1002,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = &wcn7850_uuid, .supports_dynamic_smps_6ghz = false, + + .iova_mask = ATH12K_PCIE_MAX_PAYLOAD_SIZE - 1, }, { .name = "qcn9274 hw2.0", @@ -1072,6 +1076,8 @@ static const struct ath12k_hw_params ath12k_hw_params[] = { .acpi_guid = NULL, .supports_dynamic_smps_6ghz = true, + + .iova_mask = 0, }, }; diff --git a/drivers/net/wireless/ath/ath12k/hw.h b/drivers/net/wireless/ath/ath12k/hw.h index e792eb6b249b41db7aebba59ec2f3fbc9dcb152b..b1d302c48326b8eeb8c9bde4dfe2485dfedb1b01 100644 --- a/drivers/net/wireless/ath/ath12k/hw.h +++ b/drivers/net/wireless/ath/ath12k/hw.h @@ -96,6 +96,8 @@ #define ATH12K_M3_FILE "m3.bin" #define ATH12K_REGDB_FILE_NAME "regdb.bin" +#define ATH12K_PCIE_MAX_PAYLOAD_SIZE 128 + enum ath12k_hw_rate_cck { ATH12K_HW_RATE_CCK_LP_11M = 0, ATH12K_HW_RATE_CCK_LP_5_5M, @@ -215,6 +217,8 @@ struct ath12k_hw_params { const guid_t *acpi_guid; bool supports_dynamic_smps_6ghz; + + u32 iova_mask; }; struct ath12k_hw_ops { diff --git a/drivers/net/wireless/ath/ath12k/mac.c b/drivers/net/wireless/ath/ath12k/mac.c index 8106297f0bc1c786fbb10489d4396cb3e93af08e..ce41c8153080cdd4d3e6b13e3398ddee7e7e4292 100644 --- a/drivers/net/wireless/ath/ath12k/mac.c +++ b/drivers/net/wireless/ath/ath12k/mac.c @@ -9193,6 +9193,7 @@ static int ath12k_mac_hw_register(struct ath12k_hw *ah) hw->vif_data_size = sizeof(struct ath12k_vif); hw->sta_data_size = sizeof(struct ath12k_sta); + hw->extra_tx_headroom = ab->hw_params->iova_mask; wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_STA_TX_PWR); diff --git a/drivers/net/wireless/ath/ath12k/pci.c b/drivers/net/wireless/ath/ath12k/pci.c index 876c029f58f62c3d9644dc44c80eabae8b084739..9e0b9e329bda32a733f48b54dda389c553a0d60f 100644 --- a/drivers/net/wireless/ath/ath12k/pci.c +++ b/drivers/net/wireless/ath/ath12k/pci.c @@ -473,7 +473,8 @@ static void __ath12k_pci_ext_irq_disable(struct ath12k_base *ab) { int i; - clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags); + if (!test_and_clear_bit(ATH12K_FLAG_EXT_IRQ_ENABLED, &ab->dev_flags)) + return; for (i = 0; i < ATH12K_EXT_IRQ_GRP_NUM_MAX; i++) { struct ath12k_ext_irq_grp *irq_grp = &ab->ext_irq_grp[i]; diff --git a/drivers/net/wireless/ath/ath12k/wow.c b/drivers/net/wireless/ath/ath12k/wow.c index bead19db2c9a21511e3df4cfb916ba943730af23..9b8684abbe40aea8fc93f479ad0700f9a5871289 100644 --- a/drivers/net/wireless/ath/ath12k/wow.c +++ b/drivers/net/wireless/ath/ath12k/wow.c @@ -361,7 +361,7 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, struct ath12k *ar = arvif->ar; unsigned long wow_mask = 0; int pattern_id = 0; - int ret, i; + int ret, i, j; /* Setup requested WOW features */ switch (arvif->vdev_type) { @@ -431,9 +431,9 @@ static int ath12k_wow_vif_set_wakeups(struct ath12k_vif *arvif, eth_pattern->pattern_len); /* convert bitmask to bytemask */ - for (i = 0; i < eth_pattern->pattern_len; i++) - if (eth_pattern->mask[i / 8] & BIT(i % 8)) - new_pattern.bytemask[i] = 0xff; + for (j = 0; j < eth_pattern->pattern_len; j++) + if (eth_pattern->mask[j / 8] & BIT(j % 8)) + new_pattern.bytemask[j] = 0xff; new_pattern.pattern_len = eth_pattern->pattern_len; new_pattern.pkt_offset = eth_pattern->pkt_offset; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index 1585a5653ee4b9435fb4490281ecfe0b57395aaa..d4cc5fa92341d5be73fa41a9c5bbbdf60bcdf837 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -4320,9 +4320,16 @@ brcmf_pmksa_v3_op(struct brcmf_if *ifp, struct cfg80211_pmksa *pmksa, /* Single PMK operation */ pmk_op->count = cpu_to_le16(1); length += sizeof(struct brcmf_pmksa_v3); - memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); - memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); - pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + if (pmksa->bssid) + memcpy(pmk_op->pmk[0].bssid, pmksa->bssid, ETH_ALEN); + if (pmksa->pmkid) { + memcpy(pmk_op->pmk[0].pmkid, pmksa->pmkid, WLAN_PMKID_LEN); + pmk_op->pmk[0].pmkid_len = WLAN_PMKID_LEN; + } + if (pmksa->ssid && pmksa->ssid_len) { + memcpy(pmk_op->pmk[0].ssid.SSID, pmksa->ssid, pmksa->ssid_len); + pmk_op->pmk[0].ssid.SSID_len = pmksa->ssid_len; + } pmk_op->pmk[0].time_left = cpu_to_le32(alive ? BRCMF_PMKSA_NO_EXPIRY : 0); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index b59de4f80b4b895b8ea92232830e84c684d195ec..27a7e0b5b3d51ed2c433435cfd0a17b0a70ce530 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -639,7 +639,8 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq, int slots_num, bool cmd_queue); -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr); +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len); struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *cmd_meta, u8 **hdr, unsigned int hdr_room); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c index 2e780fb2da426cf44f33e4a34e27d64eb825fa03..b1846abb99b78f4fd69dd5382987408e38cdfa4c 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c @@ -168,6 +168,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; dma_addr_t start_hdr_phys; u16 length, amsdu_pad; u8 *start_hdr; @@ -260,7 +261,8 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, int ret; tb_len = min_t(unsigned int, tso.size, data_left); - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, + tb_len); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) goto out_err; @@ -272,6 +274,7 @@ static int iwl_txq_gen2_build_amsdu(struct iwl_trans *trans, goto out_err; data_left -= tb_len; + data_offset += tb_len; tso_build_data(skb, &tso, tb_len); } } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 22d482ae53d97af8bb70238dbe6fb44382a733e4..9fe050f0ddc16002565efce0bf18aecaf771faa9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -1814,23 +1814,31 @@ out: /** * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list * @sgt: scatter gather table - * @addr: Virtual address + * @offset: Offset into the mapped memory (i.e. SKB payload data) + * @len: Length of the area * - * Find the entry that includes the address for the given address and return - * correct physical address for the TB entry. + * Find the DMA address that corresponds to the SKB payload data at the + * position given by @offset. * * Returns: Address for TB entry */ -dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, void *addr) +dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset, + unsigned int len) { struct scatterlist *sg; + unsigned int sg_offset = 0; int i; + /* + * Search the mapped DMA areas in the SG for the area that contains the + * data at offset with the given length. + */ for_each_sgtable_dma_sg(sgt, sg, i) { - if (addr >= sg_virt(sg) && - (u8 *)addr < (u8 *)sg_virt(sg) + sg_dma_len(sg)) - return sg_dma_address(sg) + - ((unsigned long)addr - (unsigned long)sg_virt(sg)); + if (offset >= sg_offset && + offset + len <= sg_offset + sg_dma_len(sg)) + return sg_dma_address(sg) + offset - sg_offset; + + sg_offset += sg_dma_len(sg); } WARN_ON_ONCE(1); @@ -1875,7 +1883,9 @@ struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb, sg_init_table(sgt->sgl, skb_shinfo(skb)->nr_frags + 1); - sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, 0, skb->len); + /* Only map the data, not the header (it is copied to the TSO page) */ + sgt->orig_nents = skb_to_sgvec(skb, sgt->sgl, skb_headlen(skb), + skb->data_len); if (WARN_ON_ONCE(sgt->orig_nents <= 0)) return NULL; @@ -1900,6 +1910,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; + unsigned int data_offset = 0; u16 length, iv_len, amsdu_pad; dma_addr_t start_hdr_phys; u8 *start_hdr, *pos_hdr; @@ -2000,7 +2011,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, data_left); dma_addr_t tb_phys; - tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, tso.data); + tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, size); /* Not a real mapping error, use direct comparison */ if (unlikely(tb_phys == DMA_MAPPING_ERROR)) return -EINVAL; @@ -2011,6 +2022,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, tb_phys, size); data_left -= size; + data_offset += size; tso_build_data(skb, &tso, size); } } diff --git a/drivers/net/wireless/mediatek/mt76/mt7921/main.c b/drivers/net/wireless/mediatek/mt76/mt7921/main.c index 2e6268cb06c002a129fbd5ddf552a6267b34de06..23b228804289be7e962704b7754de5d3155cf732 100644 --- a/drivers/net/wireless/mediatek/mt76/mt7921/main.c +++ b/drivers/net/wireless/mediatek/mt76/mt7921/main.c @@ -303,6 +303,7 @@ mt7921_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) mvif->bss_conf.mt76.omac_idx = mvif->bss_conf.mt76.idx; mvif->phy = phy; + mvif->bss_conf.vif = mvif; mvif->bss_conf.mt76.band_idx = 0; mvif->bss_conf.mt76.wmm_idx = mvif->bss_conf.mt76.idx % MT76_CONNAC_MAX_WMM_SETS; @@ -1182,7 +1183,7 @@ static void mt7921_ipv6_addr_change(struct ieee80211_hw *hw, struct inet6_dev *idev) { struct mt792x_vif *mvif = (struct mt792x_vif *)vif->drv_priv; - struct mt792x_dev *dev = mvif->phy->dev; + struct mt792x_dev *dev = mt792x_hw_dev(hw); struct inet6_ifaddr *ifa; struct in6_addr ns_addrs[IEEE80211_BSS_ARP_ADDR_LIST_LEN]; struct sk_buff *skb; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c index 700c6e2bcad19468e61075fa9b836348de0f637e..ff458fb8514daee51bee4cda46d024d3144355e4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192du/hw.c @@ -181,11 +181,11 @@ static void _rtl92du_init_queue_reserved_page(struct ieee80211_hw *hw, struct rtl_hal *rtlhal = rtl_hal(rtlpriv); u32 txqpagenum, txqpageunit; u32 txqremainingpage; + u32 value32 = 0; u32 numhq = 0; u32 numlq = 0; u32 numnq = 0; u32 numpubq; - u32 value32; if (rtlhal->macphymode != SINGLEMAC_SINGLEPHY) { numpubq = NORMAL_PAGE_NUM_PUBQ_92D_DUAL_MAC; diff --git a/drivers/nfc/pn544/i2c.c b/drivers/nfc/pn544/i2c.c index 9fe664960b38032d74fb6544b998ab1c92e198fc..e2a6575b9ff7f8d55aaec1f683b93e703844a2dd 100644 --- a/drivers/nfc/pn544/i2c.c +++ b/drivers/nfc/pn544/i2c.c @@ -126,8 +126,6 @@ struct pn544_i2c_fw_secure_blob { #define PN544_FW_CMD_RESULT_COMMAND_REJECTED 0xE0 #define PN544_FW_CMD_RESULT_CHUNK_ERROR 0xE6 -#define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) - #define PN544_FW_WRITE_BUFFER_MAX_LEN 0x9f7 #define PN544_FW_I2C_MAX_PAYLOAD PN544_HCI_I2C_LLC_MAX_SIZE #define PN544_FW_I2C_WRITE_FRAME_HEADER_LEN 8 diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 1ae8b2351654e7a7315c67025d9c453631a621f7..210fb77f51ba026d943bbb95eefce8abecdd344c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -498,7 +498,7 @@ static int pmem_attach_disk(struct device *dev, } if (fua) lim.features |= BLK_FEAT_FUA; - if (is_nd_pfn(dev)) + if (is_nd_pfn(dev) || pmem_should_map_pages(dev)) lim.features |= BLK_FEAT_DAX; if (!devm_request_mem_region(dev, res->start, resource_size(res), diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 053d5b4909cda49f433babe243309a173cd2cb18..0dc8bcc664f2237c71f1c51ee4f1c1de12c063c9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -36,6 +36,7 @@ struct nvme_ns_info { struct nvme_ns_ids ids; u32 nsid; __le32 anagrpid; + u8 pi_offset; bool is_shared; bool is_readonly; bool is_ready; @@ -1757,8 +1758,8 @@ int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, - struct queue_limits *lim) +static bool nvme_init_integrity(struct nvme_ns_head *head, + struct queue_limits *lim, struct nvme_ns_info *info) { struct blk_integrity *bi = &lim->integrity; @@ -1816,7 +1817,7 @@ static bool nvme_init_integrity(struct gendisk *disk, struct nvme_ns_head *head, } bi->tuple_size = head->ms; - bi->pi_offset = head->pi_offset; + bi->pi_offset = info->pi_offset; return true; } @@ -1902,12 +1903,11 @@ static void nvme_configure_pi_elbas(struct nvme_ns_head *head, static void nvme_configure_metadata(struct nvme_ctrl *ctrl, struct nvme_ns_head *head, struct nvme_id_ns *id, - struct nvme_id_ns_nvm *nvm) + struct nvme_id_ns_nvm *nvm, struct nvme_ns_info *info) { head->features &= ~(NVME_NS_METADATA_SUPPORTED | NVME_NS_EXT_LBAS); head->pi_type = 0; head->pi_size = 0; - head->pi_offset = 0; head->ms = le16_to_cpu(id->lbaf[nvme_lbaf_index(id->flbas)].ms); if (!head->ms || !(ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) return; @@ -1922,7 +1922,7 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - head->pi_offset = head->ms - head->pi_size; + info->pi_offset = head->ms - head->pi_size; if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -2156,7 +2156,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, lim = queue_limits_start_update(ns->disk->queue); nvme_set_ctrl_limits(ns->ctrl, &lim); - nvme_configure_metadata(ns->ctrl, ns->head, id, nvm); + nvme_configure_metadata(ns->ctrl, ns->head, id, nvm, info); nvme_set_chunk_sectors(ns, id, &lim); if (!nvme_update_disk_info(ns, id, &lim)) capacity = 0; @@ -2176,7 +2176,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns, * I/O to namespaces with metadata except when the namespace supports * PI, as it can strip/insert in that case. */ - if (!nvme_init_integrity(ns->disk, ns->head, &lim)) + if (!nvme_init_integrity(ns->head, &lim, info)) capacity = 0; ret = queue_limits_commit_update(ns->disk->queue, &lim); @@ -2280,7 +2280,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info) if (unsupported) ns->head->disk->flags |= GENHD_FL_HIDDEN; else - nvme_init_integrity(ns->head->disk, ns->head, &lim); + nvme_init_integrity(ns->head, &lim, info); ret = queue_limits_commit_update(ns->head->disk->queue, &lim); set_capacity_and_notify(ns->head->disk, get_capacity(ns->disk)); @@ -4612,7 +4612,6 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) { nvme_mpath_stop(ctrl); nvme_auth_stop(ctrl); - nvme_stop_keep_alive(ctrl); nvme_stop_failfast_work(ctrl); flush_work(&ctrl->async_event_work); cancel_work_sync(&ctrl->fw_act_work); @@ -4648,6 +4647,7 @@ EXPORT_SYMBOL_GPL(nvme_start_ctrl); void nvme_uninit_ctrl(struct nvme_ctrl *ctrl) { + nvme_stop_keep_alive(ctrl); nvme_hwmon_exit(ctrl); nvme_fault_inject_fini(&ctrl->fault_inject); dev_pm_qos_hide_latency_tolerance(ctrl->device); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index f900e44243aef0f910cf31e2c41b1d844e65d145..da57947130cc7a65ae14b1e9a73bcbf9eeff1066 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -301,7 +301,6 @@ struct nvme_ctrl { struct opal_dev *opal_dev; - char name[12]; u16 cntlid; u16 mtfa; @@ -462,20 +461,19 @@ struct nvme_ns_head { struct srcu_struct srcu; struct nvme_subsystem *subsys; struct nvme_ns_ids ids; + u8 lba_shift; + u16 ms; + u16 pi_size; + u8 pi_type; + u8 guard_type; struct list_head entry; struct kref ref; bool shared; bool passthru_err_log_enabled; - int instance; struct nvme_effects_log *effects; u64 nuse; unsigned ns_id; - int lba_shift; - u16 ms; - u16 pi_size; - u8 pi_type; - u8 pi_offset; - u8 guard_type; + int instance; #ifdef CONFIG_BLK_DEV_ZONED u64 zsze; #endif diff --git a/drivers/of/irq.c b/drivers/of/irq.c index c94203ce65bb323b033a9c635935e88c2f14878b..8fd63100ba8f0819aac78a7917714709ca6aee28 100644 --- a/drivers/of/irq.c +++ b/drivers/of/irq.c @@ -344,7 +344,8 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar struct device_node *p; const __be32 *addr; u32 intsize; - int i, res; + int i, res, addr_len; + __be32 addr_buf[3] = { 0 }; pr_debug("of_irq_parse_one: dev=%pOF, index=%d\n", device, index); @@ -353,13 +354,19 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar return of_irq_parse_oldworld(device, index, out_irq); /* Get the reg property (if any) */ - addr = of_get_property(device, "reg", NULL); + addr = of_get_property(device, "reg", &addr_len); + + /* Prevent out-of-bounds read in case of longer interrupt parent address size */ + if (addr_len > (3 * sizeof(__be32))) + addr_len = 3 * sizeof(__be32); + if (addr) + memcpy(addr_buf, addr, addr_len); /* Try the new-style interrupts-extended first */ res = of_parse_phandle_with_args(device, "interrupts-extended", "#interrupt-cells", index, out_irq); if (!res) - return of_irq_parse_raw(addr, out_irq); + return of_irq_parse_raw(addr_buf, out_irq); /* Look for the interrupt parent. */ p = of_irq_find_parent(device); @@ -389,7 +396,7 @@ int of_irq_parse_one(struct device_node *device, int index, struct of_phandle_ar /* Check if there are any interrupt-map translations to process */ - res = of_irq_parse_raw(addr, out_irq); + res = of_irq_parse_raw(addr_buf, out_irq); out: of_node_put(p); return res; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 061f01f60db4b9978284a77e6503a043972cc712..736ad8baa2a555c02e9315b6011ecc85a5221738 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -485,7 +485,9 @@ int pciehp_set_raw_indicator_status(struct hotplug_slot *hotplug_slot, struct pci_dev *pdev = ctrl_dev(ctrl); pci_config_pm_runtime_get(pdev); - pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC, status), + + /* Attention and Power Indicator Control bits are supported */ + pcie_write_cmd_nowait(ctrl, FIELD_PREP(PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC, status), PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC); pci_config_pm_runtime_put(pdev); return 0; diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index e3a49f66982d52fc8a30f3dbf387a2ea95be1deb..ffaaca0978cbc95fbb7566e9700b375cdb4940cc 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4477,12 +4477,6 @@ void pci_intx(struct pci_dev *pdev, int enable) { u16 pci_command, new; - /* Preserve the "hybrid" behavior for backwards compatibility */ - if (pci_is_managed(pdev)) { - WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); - return; - } - pci_read_config_word(pdev, PCI_COMMAND, &pci_command); if (enable) @@ -4490,8 +4484,15 @@ void pci_intx(struct pci_dev *pdev, int enable) else new = pci_command | PCI_COMMAND_INTX_DISABLE; - if (new != pci_command) + if (new != pci_command) { + /* Preserve the "hybrid" behavior for backwards compatibility */ + if (pci_is_managed(pdev)) { + WARN_ON_ONCE(pcim_intx(pdev, enable) != 0); + return; + } + pci_write_config_word(pdev, PCI_COMMAND, new); + } } EXPORT_SYMBOL_GPL(pci_intx); diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 44d3951d009fb09a21fba1d92827598e5a76f6c4..31a17a56eb3bba1b2d70e6443f37197ef3d78554 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -416,7 +416,7 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event) * but not in the user access mode as we want to use the other counters * that support sampling/filtering. */ - if (hwc->flags & PERF_EVENT_FLAG_LEGACY) { + if ((hwc->flags & PERF_EVENT_FLAG_LEGACY) && (event->attr.type == PERF_TYPE_HARDWARE)) { if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) { cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH; cmask = 1; diff --git a/drivers/platform/chrome/cros_ec_proto.c b/drivers/platform/chrome/cros_ec_proto.c index f776fd42244f4cb3489e95456cf3e82f24c7badf..73f75958e15c10f07f7495c2aa43f7f33ec53caa 100644 --- a/drivers/platform/chrome/cros_ec_proto.c +++ b/drivers/platform/chrome/cros_ec_proto.c @@ -813,9 +813,11 @@ int cros_ec_get_next_event(struct cros_ec_device *ec_dev, if (ret == -ENOPROTOOPT) { dev_dbg(ec_dev->dev, "GET_NEXT_EVENT returned invalid version error.\n"); + mutex_lock(&ec_dev->lock); ret = cros_ec_get_host_command_version_mask(ec_dev, EC_CMD_GET_NEXT_EVENT, &ver_mask); + mutex_unlock(&ec_dev->lock); if (ret < 0 || ver_mask == 0) /* * Do not change the MKBP supported version if we can't diff --git a/drivers/platform/cznic/Kconfig b/drivers/platform/cznic/Kconfig index cb0d4d686d8a5123e2f0511a03af343e5854bd74..a111eca8ff57d2b291d1d2388ef32e0d34a402e5 100644 --- a/drivers/platform/cznic/Kconfig +++ b/drivers/platform/cznic/Kconfig @@ -16,35 +16,65 @@ config TURRIS_OMNIA_MCU tristate "Turris Omnia MCU driver" depends on MACH_ARMADA_38X || COMPILE_TEST depends on I2C - depends on OF - depends on WATCHDOG - depends on GPIOLIB - depends on HW_RANDOM - depends on RTC_CLASS - depends on WATCHDOG_CORE - select GPIOLIB_IRQCHIP help Say Y here to add support for the features implemented by the microcontroller on the CZ.NIC's Turris Omnia SOHO router. - The features include: - - board poweroff into true low power mode (with voltage regulators - disabled) and the ability to configure wake up from this mode (via - rtcwake) - - true random number generator (if available on the MCU) - - MCU watchdog - - GPIO pins - - to get front button press events (the front button can be - configured either to generate press events to the CPU or to change - front LEDs panel brightness) - - to enable / disable USB port voltage regulators and to detect - USB overcurrent - - to detect MiniPCIe / mSATA card presence in MiniPCIe port 0 - - to configure resets of various peripherals on board revisions 32+ - - to enable / disable the VHV voltage regulator to the SOC in order - to be able to program SOC's OTP on board revisions 32+ - - to get input from the LED output pins of the WAN ethernet PHY, LAN - switch and MiniPCIe ports + This option only enables the core part of the driver. Specific + features can be enabled by subsequent config options. To compile this driver as a module, choose M here; the module will be called turris-omnia-mcu. +if TURRIS_OMNIA_MCU + +config TURRIS_OMNIA_MCU_GPIO + bool "Turris Omnia MCU GPIOs" + default y + depends on GPIOLIB + depends on OF + select GPIOLIB_IRQCHIP + help + Say Y here to add support for controlling MCU GPIO pins and receiving + MCU interrupts on CZ.NIC's Turris Omnia. + This enables you to + - get front button press events (the front button can be configured + either to generate press events to the CPU or to change front LEDs + panel brightness), + - enable / disable USB port voltage regulators and to detect USB + overcurrent, + - detect MiniPCIe / mSATA card presence in MiniPCIe port 0, + - configure resets of various peripherals on board revisions 32+, + - enable / disable the VHV voltage regulator to the SOC in order to be + able to program SOC's OTP on board revisions 32+, + - get input from the LED output pins of the WAN ethernet PHY, LAN + switch and MiniPCIe ports. + +config TURRIS_OMNIA_MCU_SYSOFF_WAKEUP + bool "Turris Omnia MCU system off and RTC wakeup" + default y + depends on RTC_CLASS + help + Say Y here to add support for CZ.NIC's Turris Omnia board poweroff + into true low power mode (with voltage regulators disabled) and the + ability to configure wake up from this mode (via rtcwake). + +config TURRIS_OMNIA_MCU_WATCHDOG + bool "Turris Omnia MCU watchdog" + default y + depends on WATCHDOG + select WATCHDOG_CORE + help + Say Y here to add support for watchdog provided by CZ.NIC's Turris + Omnia MCU. + +config TURRIS_OMNIA_MCU_TRNG + bool "Turris Omnia MCU true random number generator" + default y + depends on TURRIS_OMNIA_MCU_GPIO + depends on HW_RANDOM + help + Say Y here to add support for the true random number generator + provided by CZ.NIC's Turris Omnia MCU. + +endif # TURRIS_OMNIA_MCU + endif # CZNIC_PLATFORMS diff --git a/drivers/platform/cznic/Makefile b/drivers/platform/cznic/Makefile index eae4c6b341fff776fd11759625a9eda4f28f42bd..ce6d997f34d66e943d44c1993cce63142e00ffbf 100644 --- a/drivers/platform/cznic/Makefile +++ b/drivers/platform/cznic/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_TURRIS_OMNIA_MCU) += turris-omnia-mcu.o turris-omnia-mcu-y := turris-omnia-mcu-base.o -turris-omnia-mcu-y += turris-omnia-mcu-gpio.o -turris-omnia-mcu-y += turris-omnia-mcu-sys-off-wakeup.o -turris-omnia-mcu-y += turris-omnia-mcu-trng.o -turris-omnia-mcu-y += turris-omnia-mcu-watchdog.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_GPIO) += turris-omnia-mcu-gpio.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP) += turris-omnia-mcu-sys-off-wakeup.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_TRNG) += turris-omnia-mcu-trng.o +turris-omnia-mcu-$(CONFIG_TURRIS_OMNIA_MCU_WATCHDOG) += turris-omnia-mcu-watchdog.o diff --git a/drivers/platform/cznic/turris-omnia-mcu-base.c b/drivers/platform/cznic/turris-omnia-mcu-base.c index c68a7a84a9519a65005b23292bb32848acc50342..58f9afae28673753eed1ad5aebb51529ab8849e6 100644 --- a/drivers/platform/cznic/turris-omnia-mcu-base.c +++ b/drivers/platform/cznic/turris-omnia-mcu-base.c @@ -197,8 +197,12 @@ static const struct attribute_group omnia_mcu_base_group = { static const struct attribute_group *omnia_mcu_groups[] = { &omnia_mcu_base_group, +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO &omnia_mcu_gpio_group, +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP &omnia_mcu_poweroff_group, +#endif NULL }; diff --git a/drivers/platform/cznic/turris-omnia-mcu.h b/drivers/platform/cznic/turris-omnia-mcu.h index 2ca56ae13aa908c7fcf3782a336911f0716050cb..fed0d357fea39c0adb8eb1ee440ffaaae984a976 100644 --- a/drivers/platform/cznic/turris-omnia-mcu.h +++ b/drivers/platform/cznic/turris-omnia-mcu.h @@ -33,6 +33,7 @@ struct omnia_mcu { u8 board_first_mac[ETH_ALEN]; u8 board_revision; +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO /* GPIO chip */ struct gpio_chip gc; struct mutex lock; @@ -41,18 +42,25 @@ struct omnia_mcu { struct delayed_work button_release_emul_work; unsigned long last_status; bool button_pressed_emul; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP /* RTC device for configuring wake-up */ struct rtc_device *rtcdev; u32 rtc_alarm; bool front_button_poweron; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG /* MCU watchdog */ struct watchdog_device wdt; +#endif +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG /* true random number generator */ struct hwrng trng; struct completion trng_entropy_ready; +#endif }; int omnia_cmd_write_read(const struct i2c_client *client, @@ -182,13 +190,43 @@ static inline int omnia_cmd_read_u8(const struct i2c_client *client, u8 cmd, return omnia_cmd_read(client, cmd, reply, sizeof(*reply)); } +#ifdef CONFIG_TURRIS_OMNIA_MCU_GPIO extern const u8 omnia_int_to_gpio_idx[32]; extern const struct attribute_group omnia_mcu_gpio_group; -extern const struct attribute_group omnia_mcu_poweroff_group; - int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_gpiochip(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_SYSOFF_WAKEUP +extern const struct attribute_group omnia_mcu_poweroff_group; int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_sys_off_and_wakeup(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_TRNG int omnia_mcu_register_trng(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_trng(struct omnia_mcu *mcu) +{ + return 0; +} +#endif + +#ifdef CONFIG_TURRIS_OMNIA_MCU_WATCHDOG int omnia_mcu_register_watchdog(struct omnia_mcu *mcu); +#else +static inline int omnia_mcu_register_watchdog(struct omnia_mcu *mcu) +{ + return 0; +} +#endif #endif /* __TURRIS_OMNIA_MCU_H */ diff --git a/drivers/platform/surface/aggregator/controller.c b/drivers/platform/surface/aggregator/controller.c index 7fc602e01487d42542338f4c767cac28b34fcd31..7e89f547999b2a00b0d406c005df83b3c2fcc1d9 100644 --- a/drivers/platform/surface/aggregator/controller.c +++ b/drivers/platform/surface/aggregator/controller.c @@ -1354,7 +1354,8 @@ void ssam_controller_destroy(struct ssam_controller *ctrl) if (ctrl->state == SSAM_CONTROLLER_UNINITIALIZED) return; - WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED); + WARN_ON(ctrl->state != SSAM_CONTROLLER_STOPPED && + ctrl->state != SSAM_CONTROLLER_INITIALIZED); /* * Note: New events could still have been received after the previous diff --git a/drivers/platform/surface/surface_aggregator_registry.c b/drivers/platform/surface/surface_aggregator_registry.c index 1c4d74db08c9546f09e4824b904159eddc86319b..a23dff35f8ca23fc7788d133d1a4f9182cffdaf5 100644 --- a/drivers/platform/surface/surface_aggregator_registry.c +++ b/drivers/platform/surface/surface_aggregator_registry.c @@ -265,16 +265,34 @@ static const struct software_node *ssam_node_group_sl5[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, - &ssam_node_tmp_perf_profile, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_hid_main_keyboard, + &ssam_node_hid_main_touchpad, + &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + +/* Devices for Surface Laptop 6. */ +static const struct software_node *ssam_node_group_sl6[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, &ssam_node_hid_main_keyboard, &ssam_node_hid_main_touchpad, &ssam_node_hid_main_iid5, + &ssam_node_hid_sam_sensors, &ssam_node_hid_sam_ucm_ucsi, NULL, }; -/* Devices for Surface Laptop Studio. */ -static const struct software_node *ssam_node_group_sls[] = { +/* Devices for Surface Laptop Studio 1. */ +static const struct software_node *ssam_node_group_sls1[] = { &ssam_node_root, &ssam_node_bat_ac, &ssam_node_bat_main, @@ -289,6 +307,22 @@ static const struct software_node *ssam_node_group_sls[] = { NULL, }; +/* Devices for Surface Laptop Studio 2. */ +static const struct software_node *ssam_node_group_sls2[] = { + &ssam_node_root, + &ssam_node_bat_ac, + &ssam_node_bat_main, + &ssam_node_tmp_perf_profile_with_fan, + &ssam_node_tmp_sensors, + &ssam_node_fan_speed, + &ssam_node_pos_tablet_switch, + &ssam_node_hid_sam_keyboard, + &ssam_node_hid_sam_penstash, + &ssam_node_hid_sam_sensors, + &ssam_node_hid_sam_ucm_ucsi, + NULL, +}; + /* Devices for Surface Laptop Go. */ static const struct software_node *ssam_node_group_slg1[] = { &ssam_node_root, @@ -324,7 +358,7 @@ static const struct software_node *ssam_node_group_sp8[] = { NULL, }; -/* Devices for Surface Pro 9 */ +/* Devices for Surface Pro 9 and 10 */ static const struct software_node *ssam_node_group_sp9[] = { &ssam_node_root, &ssam_node_hub_kip, @@ -365,6 +399,9 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Pro 9 */ { "MSHW0343", (unsigned long)ssam_node_group_sp9 }, + /* Surface Pro 10 */ + { "MSHW0510", (unsigned long)ssam_node_group_sp9 }, + /* Surface Book 2 */ { "MSHW0107", (unsigned long)ssam_node_group_gen5 }, @@ -389,14 +426,23 @@ static const struct acpi_device_id ssam_platform_hub_match[] = { /* Surface Laptop 5 */ { "MSHW0350", (unsigned long)ssam_node_group_sl5 }, + /* Surface Laptop 6 */ + { "MSHW0530", (unsigned long)ssam_node_group_sl6 }, + /* Surface Laptop Go 1 */ { "MSHW0118", (unsigned long)ssam_node_group_slg1 }, /* Surface Laptop Go 2 */ { "MSHW0290", (unsigned long)ssam_node_group_slg1 }, - /* Surface Laptop Studio */ - { "MSHW0123", (unsigned long)ssam_node_group_sls }, + /* Surface Laptop Go 3 */ + { "MSHW0440", (unsigned long)ssam_node_group_slg1 }, + + /* Surface Laptop Studio 1 */ + { "MSHW0123", (unsigned long)ssam_node_group_sls1 }, + + /* Surface Laptop Studio 2 */ + { "MSHW0360", (unsigned long)ssam_node_group_sls2 }, { }, }; diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 665fa9524986595b02826b3645a2f4172b5c048e..ddfccc226751f4dcea9d9b3fee50a16d88c6cdd9 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -477,6 +477,7 @@ config LENOVO_YMC tristate "Lenovo Yoga Tablet Mode Control" depends on ACPI_WMI depends on INPUT + depends on IDEAPAD_LAPTOP select INPUT_SPARSEKMAP help This driver maps the Tablet Mode Control switch to SW_TABLET_MODE input diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index a3d881f6e5d904dd4104ff8582b8a42c03188e4a..c3e51f0a5c33e2160de5f5aabc037c7863a9b30a 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -764,6 +764,7 @@ static int amd_pmc_get_os_hint(struct amd_pmc_dev *dev) case AMD_CPU_ID_CB: case AMD_CPU_ID_PS: case PCI_DEVICE_ID_AMD_1AH_M20H_ROOT: + case PCI_DEVICE_ID_AMD_1AH_M60H_ROOT: return MSG_OS_HINT_RN; } return -EINVAL; @@ -967,6 +968,7 @@ static const struct pci_device_id pmc_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RV) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_SP) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; diff --git a/drivers/platform/x86/amd/pmc/pmc.h b/drivers/platform/x86/amd/pmc/pmc.h index 9e32d3128c3a2bd2e6920f3ad60f1fc3d48dd9bf..f1166d15c856203bdb4bf7a7cd634dd68dcf41bd 100644 --- a/drivers/platform/x86/amd/pmc/pmc.h +++ b/drivers/platform/x86/amd/pmc/pmc.h @@ -67,6 +67,7 @@ void amd_mp2_stb_deinit(struct amd_pmc_dev *dev); #define AMD_CPU_ID_PS 0x14E8 #define AMD_CPU_ID_SP 0x14A4 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PCI_DEVICE_ID_AMD_MP2_STB 0x172c #endif /* PMC_H */ diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index 2d6e2558863c583237dbd6219d515c15b60c293d..8f1f719befa3e7105bb58c8edf77febaca9b545d 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -41,6 +41,7 @@ #define AMD_CPU_ID_RMB 0x14b5 #define AMD_CPU_ID_PS 0x14e8 #define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 +#define PCI_DEVICE_ID_AMD_1AH_M60H_ROOT 0x1122 #define PMF_MSG_DELAY_MIN_US 50 #define RESPONSE_REGISTER_LOOP_MAX 20000 @@ -249,6 +250,7 @@ static const struct pci_device_id pmf_pci_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RMB) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M60H_ROOT) }, { } }; @@ -382,6 +384,7 @@ static const struct acpi_device_id amd_pmf_acpi_ids[] = { {"AMDI0102", 0}, {"AMDI0103", 0}, {"AMDI0105", 0}, + {"AMDI0107", 0}, { } }; MODULE_DEVICE_TABLE(acpi, amd_pmf_acpi_ids); diff --git a/drivers/platform/x86/amd/pmf/pmf-quirks.c b/drivers/platform/x86/amd/pmf/pmf-quirks.c index 0b2eb0ae85febdfcc62fbffddf231889f7842e22..460444cda1b295211eb419b1f9ba65d9900ec984 100644 --- a/drivers/platform/x86/amd/pmf/pmf-quirks.c +++ b/drivers/platform/x86/amd/pmf/pmf-quirks.c @@ -29,6 +29,14 @@ static const struct dmi_system_id fwbug_list[] = { }, .driver_data = &quirk_no_sps_bug, }, + { + .ident = "ROG Ally X", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "RC72LA"), + }, + .driver_data = &quirk_no_sps_bug, + }, {} }; @@ -48,4 +56,3 @@ void amd_pmf_quirks_init(struct amd_pmf_dev *dev) dmi_id->ident); } } - diff --git a/drivers/platform/x86/amd/pmf/spc.c b/drivers/platform/x86/amd/pmf/spc.c index a3dec14c30043ecc9c1d109247452d6d19949976..3c153fb1425e9f6c7fb5039919e40e422f2dbba4 100644 --- a/drivers/platform/x86/amd/pmf/spc.c +++ b/drivers/platform/x86/amd/pmf/spc.c @@ -150,36 +150,26 @@ static int amd_pmf_get_slider_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_ return 0; } -static int amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) +static void amd_pmf_get_sensor_info(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) { struct amd_sfh_info sfh_info; - int ret; + + /* Get the latest information from SFH */ + in->ev_info.user_present = false; /* Get ALS data */ - ret = amd_get_sfh_info(&sfh_info, MT_ALS); - if (!ret) + if (!amd_get_sfh_info(&sfh_info, MT_ALS)) in->ev_info.ambient_light = sfh_info.ambient_light; else - return ret; + dev_dbg(dev->dev, "ALS is not enabled/detected\n"); /* get HPD data */ - ret = amd_get_sfh_info(&sfh_info, MT_HPD); - if (ret) - return ret; - - switch (sfh_info.user_present) { - case SFH_NOT_DETECTED: - in->ev_info.user_present = 0xff; /* assume no sensors connected */ - break; - case SFH_USER_PRESENT: - in->ev_info.user_present = 1; - break; - case SFH_USER_AWAY: - in->ev_info.user_present = 0; - break; + if (!amd_get_sfh_info(&sfh_info, MT_HPD)) { + if (sfh_info.user_present == SFH_USER_PRESENT) + in->ev_info.user_present = true; + } else { + dev_dbg(dev->dev, "HPD is not enabled/detected\n"); } - - return 0; } void amd_pmf_populate_ta_inputs(struct amd_pmf_dev *dev, struct ta_pmf_enact_table *in) diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c index cc735931f97b93be2329e2039d22994456a16584..37636e5a38e3b52f8c46bcb0344693fb1a7c0708 100644 --- a/drivers/platform/x86/asus-wmi.c +++ b/drivers/platform/x86/asus-wmi.c @@ -146,6 +146,20 @@ static const char * const ashs_ids[] = { "ATK4001", "ATK4002", NULL }; static int throttle_thermal_policy_write(struct asus_wmi *); +static const struct dmi_system_id asus_ally_mcu_quirk[] = { + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC71L"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_BOARD_NAME, "RC72L"), + }, + }, + { }, +}; + static bool ashs_present(void) { int i = 0; @@ -4685,7 +4699,7 @@ static int asus_wmi_add(struct platform_device *pdev) asus->dgpu_disable_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_DGPU); asus->kbd_rgb_state_available = asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_TUF_RGB_STATE); asus->ally_mcu_usb_switch = acpi_has_method(NULL, ASUS_USB0_PWR_EC0_CSEE) - && dmi_match(DMI_BOARD_NAME, "RC71L"); + && dmi_check_system(asus_ally_mcu_quirk); if (asus_wmi_dev_is_present(asus, ASUS_WMI_DEVID_MINI_LED_MODE)) asus->mini_led_dev_id = ASUS_WMI_DEVID_MINI_LED_MODE; diff --git a/drivers/platform/x86/dell/Kconfig b/drivers/platform/x86/dell/Kconfig index 85a78ef91182eaa19fb99e40ae366c73c16cad36..309236cecd5a43a0b681e6eb2368396c4cfc6091 100644 --- a/drivers/platform/x86/dell/Kconfig +++ b/drivers/platform/x86/dell/Kconfig @@ -161,6 +161,7 @@ config DELL_SMO8800 config DELL_UART_BACKLIGHT tristate "Dell AIO UART Backlight driver" depends on ACPI + depends on ACPI_VIDEO depends on BACKLIGHT_CLASS_DEVICE depends on SERIAL_DEV_BUS help diff --git a/drivers/platform/x86/dell/dell-uart-backlight.c b/drivers/platform/x86/dell/dell-uart-backlight.c index 87d2a20b4cb3d8b5b1486ae14f8209fe82a5d085..3995f90add4568b65d3edc0ac8b60a074d24bd97 100644 --- a/drivers/platform/x86/dell/dell-uart-backlight.c +++ b/drivers/platform/x86/dell/dell-uart-backlight.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "../serdev_helpers.h" /* The backlight controller must respond within 1 second */ @@ -332,10 +333,17 @@ struct serdev_device_driver dell_uart_bl_serdev_driver = { static int dell_uart_bl_pdev_probe(struct platform_device *pdev) { + enum acpi_backlight_type bl_type; struct serdev_device *serdev; struct device *ctrl_dev; int ret; + bl_type = acpi_video_get_backlight_type(); + if (bl_type != acpi_backlight_dell_uart) { + dev_dbg(&pdev->dev, "Not loading (ACPI backlight type = %d)\n", bl_type); + return -ENODEV; + } + ctrl_dev = get_serdev_controller("DELL0501", NULL, 0, "serial0"); if (IS_ERR(ctrl_dev)) return PTR_ERR(ctrl_dev); diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index 1ace711f7442e2860097d20408adc134ecb6c644..98ec30fce9fdd3c20d56d825e048fd65bfd0b143 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -126,6 +126,7 @@ struct ideapad_rfk_priv { struct ideapad_private { struct acpi_device *adev; + struct mutex vpc_mutex; /* protects the VPC calls */ struct rfkill *rfk[IDEAPAD_RFKILL_DEV_NUM]; struct ideapad_rfk_priv rfk_priv[IDEAPAD_RFKILL_DEV_NUM]; struct platform_device *platform_device; @@ -146,6 +147,7 @@ struct ideapad_private { bool touchpad_ctrl_via_ec : 1; bool ctrl_ps2_aux_port : 1; bool usb_charging : 1; + bool ymc_ec_trigger : 1; } features; struct { bool initialized; @@ -194,6 +196,12 @@ MODULE_PARM_DESC(touchpad_ctrl_via_ec, "Enable registering a 'touchpad' sysfs-attribute which can be used to manually " "tell the EC to enable/disable the touchpad. This may not work on all models."); +static bool ymc_ec_trigger __read_mostly; +module_param(ymc_ec_trigger, bool, 0444); +MODULE_PARM_DESC(ymc_ec_trigger, + "Enable EC triggering work-around to force emitting tablet mode events. " + "If you need this please report this to: platform-driver-x86@vger.kernel.org"); + /* * shared data */ @@ -294,6 +302,8 @@ static int debugfs_status_show(struct seq_file *s, void *data) struct ideapad_private *priv = s->private; unsigned long value; + guard(mutex)(&priv->vpc_mutex); + if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL_MAX, &value)) seq_printf(s, "Backlight max: %lu\n", value); if (!read_ec_data(priv->adev->handle, VPCCMD_R_BL, &value)) @@ -412,7 +422,8 @@ static ssize_t camera_power_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_CAMERA, &result); if (err) return err; @@ -431,7 +442,8 @@ static ssize_t camera_power_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_CAMERA, state); if (err) return err; @@ -484,7 +496,8 @@ static ssize_t fan_mode_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_FAN, &result); if (err) return err; @@ -506,7 +519,8 @@ static ssize_t fan_mode_store(struct device *dev, if (state > 4 || state == 3) return -EINVAL; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_FAN, state); if (err) return err; @@ -591,7 +605,8 @@ static ssize_t touchpad_show(struct device *dev, unsigned long result; int err; - err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); + scoped_guard(mutex, &priv->vpc_mutex) + err = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &result); if (err) return err; @@ -612,7 +627,8 @@ static ssize_t touchpad_store(struct device *dev, if (err) return err; - err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); + scoped_guard(mutex, &priv->vpc_mutex) + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_TOUCHPAD, state); if (err) return err; @@ -1005,6 +1021,8 @@ static int ideapad_rfk_set(void *data, bool blocked) struct ideapad_rfk_priv *priv = data; int opcode = ideapad_rfk_data[priv->dev].opcode; + guard(mutex)(&priv->priv->vpc_mutex); + return write_ec_cmd(priv->priv->adev->handle, opcode, !blocked); } @@ -1018,6 +1036,8 @@ static void ideapad_sync_rfk_state(struct ideapad_private *priv) int i; if (priv->features.hw_rfkill_switch) { + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_RF, &hw_blocked)) return; hw_blocked = !hw_blocked; @@ -1191,8 +1211,9 @@ static void ideapad_input_novokey(struct ideapad_private *priv) { unsigned long long_pressed; - if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_NOVO, &long_pressed)) + return; if (long_pressed) ideapad_input_report(priv, 17); @@ -1204,8 +1225,9 @@ static void ideapad_check_special_buttons(struct ideapad_private *priv) { unsigned long bit, value; - if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) - return; + scoped_guard(mutex, &priv->vpc_mutex) + if (read_ec_data(priv->adev->handle, VPCCMD_R_SPECIAL_BUTTONS, &value)) + return; for_each_set_bit (bit, &value, 16) { switch (bit) { @@ -1238,6 +1260,8 @@ static int ideapad_backlight_get_brightness(struct backlight_device *blightdev) unsigned long now; int err; + guard(mutex)(&priv->vpc_mutex); + err = read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); if (err) return err; @@ -1250,6 +1274,8 @@ static int ideapad_backlight_update_status(struct backlight_device *blightdev) struct ideapad_private *priv = bl_get_data(blightdev); int err; + guard(mutex)(&priv->vpc_mutex); + err = write_ec_cmd(priv->adev->handle, VPCCMD_W_BL, blightdev->props.brightness); if (err) @@ -1327,6 +1353,8 @@ static void ideapad_backlight_notify_power(struct ideapad_private *priv) if (!blightdev) return; + guard(mutex)(&priv->vpc_mutex); + if (read_ec_data(priv->adev->handle, VPCCMD_R_BL_POWER, &power)) return; @@ -1339,7 +1367,8 @@ static void ideapad_backlight_notify_brightness(struct ideapad_private *priv) /* if we control brightness via acpi video driver */ if (!priv->blightdev) - read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); + scoped_guard(mutex, &priv->vpc_mutex) + read_ec_data(priv->adev->handle, VPCCMD_R_BL, &now); else backlight_force_update(priv->blightdev, BACKLIGHT_UPDATE_HOTKEY); } @@ -1564,7 +1593,8 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ int ret; /* Without reading from EC touchpad LED doesn't switch state */ - ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); + scoped_guard(mutex, &priv->vpc_mutex) + ret = read_ec_data(priv->adev->handle, VPCCMD_R_TOUCHPAD, &value); if (ret) return; @@ -1592,16 +1622,92 @@ static void ideapad_sync_touchpad_state(struct ideapad_private *priv, bool send_ priv->r_touchpad_val = value; } +static const struct dmi_system_id ymc_ec_trigger_quirk_dmi_table[] = { + { + /* Lenovo Yoga 7 14ARB7 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), + }, + }, + { + /* Lenovo Yoga 7 14ACN6 */ + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), + }, + }, + { } +}; + +static void ideapad_laptop_trigger_ec(void) +{ + struct ideapad_private *priv; + int ret; + + guard(mutex)(&ideapad_shared_mutex); + + priv = ideapad_shared; + if (!priv) + return; + + if (!priv->features.ymc_ec_trigger) + return; + + scoped_guard(mutex, &priv->vpc_mutex) + ret = write_ec_cmd(priv->adev->handle, VPCCMD_W_YMC, 1); + if (ret) + dev_warn(&priv->platform_device->dev, "Could not write YMC: %d\n", ret); +} + +static int ideapad_laptop_nb_notify(struct notifier_block *nb, + unsigned long action, void *data) +{ + switch (action) { + case IDEAPAD_LAPTOP_YMC_EVENT: + ideapad_laptop_trigger_ec(); + break; + } + + return 0; +} + +static struct notifier_block ideapad_laptop_notifier = { + .notifier_call = ideapad_laptop_nb_notify, +}; + +static BLOCKING_NOTIFIER_HEAD(ideapad_laptop_chain_head); + +int ideapad_laptop_register_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_register_notifier, IDEAPAD_LAPTOP); + +int ideapad_laptop_unregister_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&ideapad_laptop_chain_head, nb); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_unregister_notifier, IDEAPAD_LAPTOP); + +void ideapad_laptop_call_notifier(unsigned long action, void *data) +{ + blocking_notifier_call_chain(&ideapad_laptop_chain_head, action, data); +} +EXPORT_SYMBOL_NS_GPL(ideapad_laptop_call_notifier, IDEAPAD_LAPTOP); + static void ideapad_acpi_notify(acpi_handle handle, u32 event, void *data) { struct ideapad_private *priv = data; unsigned long vpc1, vpc2, bit; - if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) - return; + scoped_guard(mutex, &priv->vpc_mutex) { + if (read_ec_data(handle, VPCCMD_R_VPC1, &vpc1)) + return; - if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) - return; + if (read_ec_data(handle, VPCCMD_R_VPC2, &vpc2)) + return; + } vpc1 = (vpc2 << 8) | vpc1; @@ -1728,6 +1834,8 @@ static void ideapad_check_features(struct ideapad_private *priv) priv->features.ctrl_ps2_aux_port = ctrl_ps2_aux_port || dmi_check_system(ctrl_ps2_aux_port_list); priv->features.touchpad_ctrl_via_ec = touchpad_ctrl_via_ec; + priv->features.ymc_ec_trigger = + ymc_ec_trigger || dmi_check_system(ymc_ec_trigger_quirk_dmi_table); if (!read_ec_data(handle, VPCCMD_R_FAN, &val)) priv->features.fan_mode = true; @@ -1906,6 +2014,10 @@ static int ideapad_acpi_add(struct platform_device *pdev) priv->adev = adev; priv->platform_device = pdev; + err = devm_mutex_init(&pdev->dev, &priv->vpc_mutex); + if (err) + return err; + ideapad_check_features(priv); err = ideapad_sysfs_init(priv); @@ -1974,6 +2086,8 @@ static int ideapad_acpi_add(struct platform_device *pdev) if (err) goto shared_init_failed; + ideapad_laptop_register_notifier(&ideapad_laptop_notifier); + return 0; shared_init_failed: @@ -2006,6 +2120,8 @@ static void ideapad_acpi_remove(struct platform_device *pdev) struct ideapad_private *priv = dev_get_drvdata(&pdev->dev); int i; + ideapad_laptop_unregister_notifier(&ideapad_laptop_notifier); + ideapad_shared_exit(priv); acpi_remove_notify_handler(priv->adev->handle, diff --git a/drivers/platform/x86/ideapad-laptop.h b/drivers/platform/x86/ideapad-laptop.h index 4498a96de59769e7ed9db5e021b3ed856a121a62..948cc61800a950fff5298af64d69c2352b132b3a 100644 --- a/drivers/platform/x86/ideapad-laptop.h +++ b/drivers/platform/x86/ideapad-laptop.h @@ -12,6 +12,15 @@ #include #include #include +#include + +enum ideapad_laptop_notifier_actions { + IDEAPAD_LAPTOP_YMC_EVENT, +}; + +int ideapad_laptop_register_notifier(struct notifier_block *nb); +int ideapad_laptop_unregister_notifier(struct notifier_block *nb); +void ideapad_laptop_call_notifier(unsigned long action, void *data); enum { VPCCMD_R_VPC1 = 0x10, diff --git a/drivers/platform/x86/intel/ifs/runtest.c b/drivers/platform/x86/intel/ifs/runtest.c index 282e4bfe30da326c47156219a5b74790047ab4e3..be3d51ed0e4740eca7c3885b97d591587e747785 100644 --- a/drivers/platform/x86/intel/ifs/runtest.c +++ b/drivers/platform/x86/intel/ifs/runtest.c @@ -221,8 +221,8 @@ static int doscan(void *data) */ static void ifs_test_core(int cpu, struct device *dev) { + union ifs_status status = {}; union ifs_scan activate; - union ifs_status status; unsigned long timeout; struct ifs_data *ifsd; int to_start, to_stop; diff --git a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c index 7fa360073f6ef48c2d6607c7f75c022ce9dbdf4e..4045823071091a1af76d3dfc40481dc1b80e4788 100644 --- a/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c +++ b/drivers/platform/x86/intel/speed_select_if/isst_tpmi_core.c @@ -1549,8 +1549,7 @@ int tpmi_sst_dev_add(struct auxiliary_device *auxdev) goto unlock_free; } - ret = sst_main(auxdev, &pd_info[i]); - if (ret) { + if (sst_main(auxdev, &pd_info[i])) { /* * This entry is not valid, hardware can partially * populate dies. In this case MMIO will have 0xFFs. diff --git a/drivers/platform/x86/intel/vbtn.c b/drivers/platform/x86/intel/vbtn.c index 9b7ce03ba085c245ce04c4eb8d2eaafa969b2970..a353e830b65fd1c3f2082aa2950a608895592873 100644 --- a/drivers/platform/x86/intel/vbtn.c +++ b/drivers/platform/x86/intel/vbtn.c @@ -7,11 +7,13 @@ */ #include +#include #include #include #include #include #include +#include #include #include #include "../dual_accel_detect.h" @@ -66,6 +68,7 @@ static const struct key_entry intel_vbtn_switchmap[] = { }; struct intel_vbtn_priv { + struct mutex mutex; /* Avoid notify_handler() racing with itself */ struct input_dev *buttons_dev; struct input_dev *switches_dev; bool dual_accel; @@ -155,6 +158,8 @@ static void notify_handler(acpi_handle handle, u32 event, void *context) bool autorelease; int ret; + guard(mutex)(&priv->mutex); + if ((ke = sparse_keymap_entry_from_scancode(priv->buttons_dev, event))) { if (!priv->has_buttons) { dev_warn(&device->dev, "Warning: received 0x%02x button event on a device without buttons, please report this.\n", @@ -290,6 +295,10 @@ static int intel_vbtn_probe(struct platform_device *device) return -ENOMEM; dev_set_drvdata(&device->dev, priv); + err = devm_mutex_init(&device->dev, &priv->mutex); + if (err) + return err; + priv->dual_accel = dual_accel; priv->has_buttons = has_buttons; priv->has_switches = has_switches; diff --git a/drivers/platform/x86/lenovo-ymc.c b/drivers/platform/x86/lenovo-ymc.c index e1fbc35504d4985e0949d051ce1231584a544c42..e0bbd6a14a89cb0b0b968e0003299360e66f311c 100644 --- a/drivers/platform/x86/lenovo-ymc.c +++ b/drivers/platform/x86/lenovo-ymc.c @@ -20,32 +20,10 @@ #define LENOVO_YMC_QUERY_INSTANCE 0 #define LENOVO_YMC_QUERY_METHOD 0x01 -static bool ec_trigger __read_mostly; -module_param(ec_trigger, bool, 0444); -MODULE_PARM_DESC(ec_trigger, "Enable EC triggering work-around to force emitting tablet mode events"); - static bool force; module_param(force, bool, 0444); MODULE_PARM_DESC(force, "Force loading on boards without a convertible DMI chassis-type"); -static const struct dmi_system_id ec_trigger_quirk_dmi_table[] = { - { - /* Lenovo Yoga 7 14ARB7 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82QF"), - }, - }, - { - /* Lenovo Yoga 7 14ACN6 */ - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"), - DMI_MATCH(DMI_PRODUCT_NAME, "82N7"), - }, - }, - { } -}; - static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { { .matches = { @@ -62,21 +40,8 @@ static const struct dmi_system_id allowed_chasis_types_dmi_table[] = { struct lenovo_ymc_private { struct input_dev *input_dev; - struct acpi_device *ec_acpi_dev; }; -static void lenovo_ymc_trigger_ec(struct wmi_device *wdev, struct lenovo_ymc_private *priv) -{ - int err; - - if (!priv->ec_acpi_dev) - return; - - err = write_ec_cmd(priv->ec_acpi_dev->handle, VPCCMD_W_YMC, 1); - if (err) - dev_warn(&wdev->dev, "Could not write YMC: %d\n", err); -} - static const struct key_entry lenovo_ymc_keymap[] = { /* Laptop */ { KE_SW, 0x01, { .sw = { SW_TABLET_MODE, 0 } } }, @@ -125,11 +90,9 @@ static void lenovo_ymc_notify(struct wmi_device *wdev, union acpi_object *data) free_obj: kfree(obj); - lenovo_ymc_trigger_ec(wdev, priv); + ideapad_laptop_call_notifier(IDEAPAD_LAPTOP_YMC_EVENT, &code); } -static void acpi_dev_put_helper(void *p) { acpi_dev_put(p); } - static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) { struct lenovo_ymc_private *priv; @@ -143,29 +106,10 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) return -ENODEV; } - ec_trigger |= dmi_check_system(ec_trigger_quirk_dmi_table); - priv = devm_kzalloc(&wdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; - if (ec_trigger) { - pr_debug("Lenovo YMC enable EC triggering.\n"); - priv->ec_acpi_dev = acpi_dev_get_first_match_dev("VPC2004", NULL, -1); - - if (!priv->ec_acpi_dev) { - dev_err(&wdev->dev, "Could not find EC ACPI device.\n"); - return -ENODEV; - } - err = devm_add_action_or_reset(&wdev->dev, - acpi_dev_put_helper, priv->ec_acpi_dev); - if (err) { - dev_err(&wdev->dev, - "Could not clean up EC ACPI device: %d\n", err); - return err; - } - } - input_dev = devm_input_allocate_device(&wdev->dev); if (!input_dev) return -ENOMEM; @@ -192,7 +136,6 @@ static int lenovo_ymc_probe(struct wmi_device *wdev, const void *ctx) dev_set_drvdata(&wdev->dev, priv); /* Report the state for the first time on probe */ - lenovo_ymc_trigger_ec(wdev, priv); lenovo_ymc_notify(wdev, NULL); return 0; } @@ -217,3 +160,4 @@ module_wmi_driver(lenovo_ymc_driver); MODULE_AUTHOR("Gergo Koteles "); MODULE_DESCRIPTION("Lenovo Yoga Mode Control driver"); MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(IDEAPAD_LAPTOP); diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c index 3e94fdd1ea52e035588847b6578b43209aa93bc5..3197aaa69da73dbde21be0c3da2d0bd8011cdcf3 100644 --- a/drivers/platform/x86/sony-laptop.c +++ b/drivers/platform/x86/sony-laptop.c @@ -757,7 +757,6 @@ static union acpi_object *__call_snc_method(acpi_handle handle, char *method, return result; } -#define MIN(a, b) (a > b ? b : a) static int sony_nc_buffer_call(acpi_handle handle, char *name, u64 *value, void *buffer, size_t buflen) { diff --git a/drivers/pmdomain/imx/imx93-pd.c b/drivers/pmdomain/imx/imx93-pd.c index 1e94b499c19bcc6f3bccff7340a830b8e3868748..d750a7dc58d212d61bc782d1ccd93c3fa206baa0 100644 --- a/drivers/pmdomain/imx/imx93-pd.c +++ b/drivers/pmdomain/imx/imx93-pd.c @@ -20,6 +20,7 @@ #define FUNC_STAT_PSW_STAT_MASK BIT(0) #define FUNC_STAT_RST_STAT_MASK BIT(2) #define FUNC_STAT_ISO_STAT_MASK BIT(4) +#define FUNC_STAT_SSAR_STAT_MASK BIT(8) struct imx93_power_domain { struct generic_pm_domain genpd; @@ -50,7 +51,7 @@ static int imx93_pd_on(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - !(val & FUNC_STAT_ISO_STAT_MASK), 1, 10000); + !(val & FUNC_STAT_SSAR_STAT_MASK), 1, 10000); if (ret) { dev_err(domain->dev, "pd_on timeout: name: %s, stat: %x\n", genpd->name, val); return ret; @@ -72,7 +73,7 @@ static int imx93_pd_off(struct generic_pm_domain *genpd) writel(val, addr + MIX_SLICE_SW_CTRL_OFF); ret = readl_poll_timeout(addr + MIX_FUNC_STAT_OFF, val, - val & FUNC_STAT_PSW_STAT_MASK, 1, 1000); + val & FUNC_STAT_PSW_STAT_MASK, 1, 10000); if (ret) { dev_err(domain->dev, "pd_off timeout: name: %s, stat: %x\n", genpd->name, val); return ret; diff --git a/drivers/pmdomain/imx/scu-pd.c b/drivers/pmdomain/imx/scu-pd.c index 05841b0bf7f30db175a8fd723d89b1b2b0306afa..01d465d88f60dcbf3d20335d364077996f405b4a 100644 --- a/drivers/pmdomain/imx/scu-pd.c +++ b/drivers/pmdomain/imx/scu-pd.c @@ -223,11 +223,6 @@ static const struct imx_sc_pd_range imx8qxp_scu_pd_ranges[] = { { "lvds1-pwm", IMX_SC_R_LVDS_1_PWM_0, 1, false, 0 }, { "lvds1-lpi2c", IMX_SC_R_LVDS_1_I2C_0, 2, true, 0 }, - { "mipi1", IMX_SC_R_MIPI_1, 1, 0 }, - { "mipi1-pwm0", IMX_SC_R_MIPI_1_PWM_0, 1, 0 }, - { "mipi1-i2c", IMX_SC_R_MIPI_1_I2C_0, 2, 1 }, - { "lvds1", IMX_SC_R_LVDS_1, 1, 0 }, - /* DC SS */ { "dc0", IMX_SC_R_DC_0, 1, false, 0 }, { "dc0-pll", IMX_SC_R_DC_0_PLL_0, 2, true, 0 }, diff --git a/drivers/power/sequencing/pwrseq-qcom-wcn.c b/drivers/power/sequencing/pwrseq-qcom-wcn.c index 42dacfda745e4ac3f20febb7b46fc7c9ab628966..d786cbf1b2cd647afb33033ecc93d7aa11ce9e0c 100644 --- a/drivers/power/sequencing/pwrseq-qcom-wcn.c +++ b/drivers/power/sequencing/pwrseq-qcom-wcn.c @@ -283,7 +283,7 @@ static int pwrseq_qcom_wcn_probe(struct platform_device *pdev) "Failed to get the Bluetooth enable GPIO\n"); ctx->wlan_gpio = devm_gpiod_get_optional(dev, "wlan-enable", - GPIOD_OUT_LOW); + GPIOD_ASIS); if (IS_ERR(ctx->wlan_gpio)) return dev_err_probe(dev, PTR_ERR(ctx->wlan_gpio), "Failed to get the WLAN enable GPIO\n"); diff --git a/drivers/power/supply/axp288_charger.c b/drivers/power/supply/axp288_charger.c index b5903193e2f96d494a6aaccb515977a74b21a4c8..ac05942e4e6ac1f5610a7217857465136a695601 100644 --- a/drivers/power/supply/axp288_charger.c +++ b/drivers/power/supply/axp288_charger.c @@ -178,18 +178,18 @@ static inline int axp288_charger_set_cv(struct axp288_chrg_info *info, int cv) u8 reg_val; int ret; - if (cv <= CV_4100MV) { - reg_val = CHRG_CCCV_CV_4100MV; - cv = CV_4100MV; - } else if (cv <= CV_4150MV) { - reg_val = CHRG_CCCV_CV_4150MV; - cv = CV_4150MV; - } else if (cv <= CV_4200MV) { + if (cv >= CV_4350MV) { + reg_val = CHRG_CCCV_CV_4350MV; + cv = CV_4350MV; + } else if (cv >= CV_4200MV) { reg_val = CHRG_CCCV_CV_4200MV; cv = CV_4200MV; + } else if (cv >= CV_4150MV) { + reg_val = CHRG_CCCV_CV_4150MV; + cv = CV_4150MV; } else { - reg_val = CHRG_CCCV_CV_4350MV; - cv = CV_4350MV; + reg_val = CHRG_CCCV_CV_4100MV; + cv = CV_4100MV; } reg_val = reg_val << CHRG_CCCV_CV_BIT_POS; @@ -337,8 +337,8 @@ static int axp288_charger_usb_set_property(struct power_supply *psy, } break; case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE: - scaled_val = min(val->intval, info->max_cv); - scaled_val = DIV_ROUND_CLOSEST(scaled_val, 1000); + scaled_val = DIV_ROUND_CLOSEST(val->intval, 1000); + scaled_val = min(scaled_val, info->max_cv); ret = axp288_charger_set_cv(info, scaled_val); if (ret < 0) { dev_warn(&info->pdev->dev, "set charge voltage failed\n"); diff --git a/drivers/power/supply/qcom_battmgr.c b/drivers/power/supply/qcom_battmgr.c index 46f36dcb185c366aec6652017e1c64f9b0822ae1..49bef4a5ac3f3e805f65d6b505a394d78b4012a0 100644 --- a/drivers/power/supply/qcom_battmgr.c +++ b/drivers/power/supply/qcom_battmgr.c @@ -486,7 +486,7 @@ static int qcom_battmgr_bat_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -683,7 +683,7 @@ static int qcom_battmgr_ac_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); if (ret) @@ -748,7 +748,7 @@ static int qcom_battmgr_usb_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -867,7 +867,7 @@ static int qcom_battmgr_wls_get_property(struct power_supply *psy, int ret; if (!battmgr->service_up) - return -ENODEV; + return -EAGAIN; if (battmgr->variant == QCOM_BATTMGR_SC8280XP) ret = qcom_battmgr_bat_sc8280xp_update(battmgr, psp); @@ -1007,7 +1007,9 @@ static void qcom_battmgr_sc8280xp_callback(struct qcom_battmgr *battmgr, battmgr->error = 0; break; case BATTMGR_BAT_INFO: - if (payload_len != sizeof(resp->info)) { + /* some firmware versions report an extra __le32 at the end of the payload */ + if (payload_len != sizeof(resp->info) && + payload_len != (sizeof(resp->info) + sizeof(__le32))) { dev_warn(battmgr->dev, "invalid payload length for battery information request: %zd\n", payload_len); diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c index 32eafe2c00af5177e0db8a31eab0d8d892e0d86c..7a27b262fb84a73db82b12e97c2fb57162709e14 100644 --- a/drivers/power/supply/rt5033_battery.c +++ b/drivers/power/supply/rt5033_battery.c @@ -159,6 +159,7 @@ static int rt5033_battery_probe(struct i2c_client *client) return -EINVAL; } + i2c_set_clientdata(client, battery); psy_cfg.of_node = client->dev.of_node; psy_cfg.drv_data = battery; diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c index 0a97cfedd7060ace133224fe718d401cb4a7fda7..42a4a996defbe1e4e39c803340b98eb05472bd69 100644 --- a/drivers/s390/block/dasd.c +++ b/drivers/s390/block/dasd.c @@ -1601,9 +1601,15 @@ static int dasd_ese_needs_format(struct dasd_block *block, struct irb *irb) if (!sense) return 0; - return !!(sense[1] & SNS1_NO_REC_FOUND) || - !!(sense[1] & SNS1_FILE_PROTECTED) || - scsw_cstat(&irb->scsw) == SCHN_STAT_INCORR_LEN; + if (sense[1] & SNS1_NO_REC_FOUND) + return 1; + + if ((sense[1] & SNS1_INV_TRACK_FORMAT) && + scsw_is_tm(&irb->scsw) && + !(sense[2] & SNS2_ENV_DATA_PRESENT)) + return 1; + + return 0; } static int dasd_ese_oos_cond(u8 *sense) @@ -1624,7 +1630,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, struct dasd_device *device; unsigned long now; int nrf_suppressed = 0; - int fp_suppressed = 0; + int it_suppressed = 0; struct request *req; u8 *sense = NULL; int expires; @@ -1679,8 +1685,9 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, */ sense = dasd_get_sense(irb); if (sense) { - fp_suppressed = (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); + it_suppressed = (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); nrf_suppressed = (sense[1] & SNS1_NO_REC_FOUND) && test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); @@ -1695,7 +1702,7 @@ void dasd_int_handler(struct ccw_device *cdev, unsigned long intparm, return; } } - if (!(fp_suppressed || nrf_suppressed)) + if (!(it_suppressed || nrf_suppressed)) device->discipline->dump_sense_dbf(device, irb, "int"); if (device->features & DASD_FEATURE_ERPLOG) @@ -2459,14 +2466,17 @@ retry: rc = 0; list_for_each_entry_safe(cqr, n, ccw_queue, blocklist) { /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and error recovery would be - * unnecessary in these cases. Check if the according suppress - * bit is set. + * In some cases certain errors might be expected and + * error recovery would be unnecessary in these cases. + * Check if the according suppress bit is set. */ sense = dasd_get_sense(&cqr->irb); - if (sense && sense[1] & SNS1_FILE_PROTECTED && - test_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags)) + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags)) + continue; + if (sense && (sense[1] & SNS1_NO_REC_FOUND) && + test_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags)) continue; if (scsw_cstat(&cqr->irb.scsw) == 0x40 && test_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags)) diff --git a/drivers/s390/block/dasd_3990_erp.c b/drivers/s390/block/dasd_3990_erp.c index bbbacfc386f28d92ae2ab9e2091d8178328d6d55..d0aa267462c50a556f3eb718df9e29edb8fbb72b 100644 --- a/drivers/s390/block/dasd_3990_erp.c +++ b/drivers/s390/block/dasd_3990_erp.c @@ -1386,14 +1386,8 @@ dasd_3990_erp_file_prot(struct dasd_ccw_req * erp) struct dasd_device *device = erp->startdev; - /* - * In some cases the 'File Protected' error might be expected and - * log messages shouldn't be written then. - * Check if the according suppress bit is set. - */ - if (!test_bit(DASD_CQR_SUPPRESS_FP, &erp->flags)) - dev_err(&device->cdev->dev, - "Accessing the DASD failed because of a hardware error\n"); + dev_err(&device->cdev->dev, + "Accessing the DASD failed because of a hardware error\n"); return dasd_3990_erp_cleanup(erp, DASD_CQR_FAILED); diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9388b5c383cab8e48e4d300fb5e41d0947f0c59d..90b106408992d0c793b02bfed7e5eec4a8d44bf9 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2275,6 +2275,7 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); return cqr; } @@ -2556,7 +2557,6 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata, cqr->buildclk = get_tod_clock(); cqr->status = DASD_CQR_FILLED; /* Set flags to suppress output for expected errors */ - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); return cqr; @@ -4130,8 +4130,6 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); } @@ -4633,9 +4631,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track( /* Set flags to suppress output for expected errors */ if (dasd_eckd_is_ese(basedev)) { - set_bit(DASD_CQR_SUPPRESS_FP, &cqr->flags); - set_bit(DASD_CQR_SUPPRESS_IL, &cqr->flags); set_bit(DASD_CQR_SUPPRESS_NRF, &cqr->flags); + set_bit(DASD_CQR_SUPPRESS_IT, &cqr->flags); } return cqr; @@ -5780,36 +5777,32 @@ static void dasd_eckd_dump_sense(struct dasd_device *device, { u8 *sense = dasd_get_sense(irb); - if (scsw_is_tm(&irb->scsw)) { - /* - * In some cases the 'File Protected' or 'Incorrect Length' - * error might be expected and log messages shouldn't be written - * then. Check if the according suppress bit is set. - */ - if (sense && (sense[1] & SNS1_FILE_PROTECTED) && - test_bit(DASD_CQR_SUPPRESS_FP, &req->flags)) - return; - if (scsw_cstat(&irb->scsw) == 0x40 && - test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) - return; + /* + * In some cases certain errors might be expected and + * log messages shouldn't be written then. + * Check if the according suppress bit is set. + */ + if (sense && (sense[1] & SNS1_INV_TRACK_FORMAT) && + !(sense[2] & SNS2_ENV_DATA_PRESENT) && + test_bit(DASD_CQR_SUPPRESS_IT, &req->flags)) + return; - dasd_eckd_dump_sense_tcw(device, req, irb); - } else { - /* - * In some cases the 'Command Reject' or 'No Record Found' - * error might be expected and log messages shouldn't be - * written then. Check if the according suppress bit is set. - */ - if (sense && sense[0] & SNS0_CMD_REJECT && - test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) - return; + if (sense && sense[0] & SNS0_CMD_REJECT && + test_bit(DASD_CQR_SUPPRESS_CR, &req->flags)) + return; - if (sense && sense[1] & SNS1_NO_REC_FOUND && - test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) - return; + if (sense && sense[1] & SNS1_NO_REC_FOUND && + test_bit(DASD_CQR_SUPPRESS_NRF, &req->flags)) + return; + if (scsw_cstat(&irb->scsw) == 0x40 && + test_bit(DASD_CQR_SUPPRESS_IL, &req->flags)) + return; + + if (scsw_is_tm(&irb->scsw)) + dasd_eckd_dump_sense_tcw(device, req, irb); + else dasd_eckd_dump_sense_ccw(device, req, irb); - } } static int dasd_eckd_reload_device(struct dasd_device *device) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index 1aa426b1deddc7ba75c2728c5bfab93af6c71d5e..6da47a65af610f3ed1f83c81fc8511642593b358 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -41,7 +41,6 @@ int dasd_gendisk_alloc(struct dasd_block *block) */ .max_segment_size = PAGE_SIZE, .seg_boundary_mask = PAGE_SIZE - 1, - .dma_alignment = PAGE_SIZE - 1, .max_segments = USHRT_MAX, }; struct gendisk *gdp; diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index e5f40536b4254027c3c181a6efb29e318fa72f7a..81cfb5c89681bc9612acac0f333b8e6d7e4896a8 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -196,7 +196,7 @@ struct dasd_ccw_req { * The following flags are used to suppress output of certain errors. */ #define DASD_CQR_SUPPRESS_NRF 4 /* Suppress 'No Record Found' error */ -#define DASD_CQR_SUPPRESS_FP 5 /* Suppress 'File Protected' error*/ +#define DASD_CQR_SUPPRESS_IT 5 /* Suppress 'Invalid Track' error*/ #define DASD_CQR_SUPPRESS_IL 6 /* Suppress 'Incorrect Length' error */ #define DASD_CQR_SUPPRESS_CR 7 /* Suppress 'Command Reject' error */ diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c index b72f672a7720a61885c3540231fe0da56b00ef7b..66b1bdc6328443eba021a60c1013f75a0a1d7120 100644 --- a/drivers/s390/cio/ccwgroup.c +++ b/drivers/s390/cio/ccwgroup.c @@ -550,4 +550,5 @@ void ccwgroup_remove_ccwdev(struct ccw_device *cdev) put_device(&gdev->dev); } EXPORT_SYMBOL(ccwgroup_remove_ccwdev); +MODULE_DESCRIPTION("ccwgroup bus driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 8ad49030a7bf3044e75cf43c51c10af9c71ce7bd..914dde0416753f2b3fc4042f9eda8e9b230cac83 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -488,4 +488,5 @@ static void __exit vfio_ccw_sch_exit(void) module_init(vfio_ccw_sch_init); module_exit(vfio_ccw_sch_exit); +MODULE_DESCRIPTION("VFIO based Subchannel device driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c index 0998b17ecb37e28ed3be85f55051bd87b16519c4..f9f682f194154db8e9b52d7c90fce7469e4c2853 100644 --- a/drivers/s390/crypto/ap_bus.c +++ b/drivers/s390/crypto/ap_bus.c @@ -971,11 +971,16 @@ int ap_driver_register(struct ap_driver *ap_drv, struct module *owner, char *name) { struct device_driver *drv = &ap_drv->driver; + int rc; drv->bus = &ap_bus_type; drv->owner = owner; drv->name = name; - return driver_register(drv); + rc = driver_register(drv); + + ap_check_bindings_complete(); + + return rc; } EXPORT_SYMBOL(ap_driver_register); diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index d31884f82f2ad5daa4a0657059377824e609c8b4..73085d2f5c4344681aaa5adef25cb4ef82a211b3 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -65,11 +65,7 @@ #include "task.h" #include "probe_roms.h" -#define MAJ 1 -#define MIN 2 -#define BUILD 0 -#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ - __stringify(BUILD) +#define DRV_VERSION "1.2.0" MODULE_VERSION(DRV_VERSION); diff --git a/drivers/scsi/mpi3mr/mpi3mr_app.c b/drivers/scsi/mpi3mr/mpi3mr_app.c index 8b0eded6ef36e519b3dce897917c4bd92f8f261a..01f035f9330e4bcb4e26641737748130278a0a9c 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_app.c +++ b/drivers/scsi/mpi3mr/mpi3mr_app.c @@ -100,7 +100,8 @@ retry_trace: dprint_init(mrioc, "trying to allocate trace diag buffer of size = %dKB\n", trace_size / 1024); - if (mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { + if (get_order(trace_size) > MAX_PAGE_ORDER || + mpi3mr_alloc_trace_buffer(mrioc, trace_size)) { retry = true; trace_size -= trace_dec_size; dprint_init(mrioc, "trace diag buffer allocation failed\n" @@ -118,8 +119,12 @@ retry_fw: diag_buffer->type = MPI3_DIAG_BUFFER_TYPE_FW; diag_buffer->status = MPI3MR_HDB_BUFSTATUS_NOT_ALLOCATED; if ((mrioc->facts.diag_fw_sz < fw_size) && (fw_size >= fw_min_size)) { - diag_buffer->addr = dma_alloc_coherent(&mrioc->pdev->dev, - fw_size, &diag_buffer->dma_addr, GFP_KERNEL); + if (get_order(fw_size) <= MAX_PAGE_ORDER) { + diag_buffer->addr + = dma_alloc_coherent(&mrioc->pdev->dev, fw_size, + &diag_buffer->dma_addr, + GFP_KERNEL); + } if (!retry) dprint_init(mrioc, "%s:trying to allocate firmware diag buffer of size = %dKB\n", diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index 69b14918de59ea02aed9f6ac17d9843423dcc82c..616894571c6abe6758ef3cc648555d852941f058 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -3575,6 +3575,17 @@ static int mpi3mr_prepare_sg_scmd(struct mpi3mr_ioc *mrioc, scmd->sc_data_direction); priv->meta_sg_valid = 1; /* To unmap meta sg DMA */ } else { + /* + * Some firmware versions byte-swap the REPORT ZONES command + * reply from ATA-ZAC devices by directly accessing in the host + * buffer. This does not respect the default command DMA + * direction and causes IOMMU page faults on some architectures + * with an IOMMU enforcing write mappings (e.g. AMD hosts). + * Avoid such issue by making the REPORT ZONES buffer mapping + * bi-directional. + */ + if (scmd->cmnd[0] == ZBC_IN && scmd->cmnd[1] == ZI_REPORT_ZONES) + scmd->sc_data_direction = DMA_BIDIRECTIONAL; sg_scmd = scsi_sglist(scmd); sges_left = scsi_dma_map(scmd); } @@ -5223,6 +5234,7 @@ mpi3mr_probe(struct pci_dev *pdev, const struct pci_device_id *id) spin_lock_init(&mrioc->watchdog_lock); spin_lock_init(&mrioc->chain_buf_lock); spin_lock_init(&mrioc->sas_node_lock); + spin_lock_init(&mrioc->trigger_lock); INIT_LIST_HEAD(&mrioc->fwevt_list); INIT_LIST_HEAD(&mrioc->tgtdev_list); diff --git a/drivers/scsi/mpt3sas/mpt3sas_base.c b/drivers/scsi/mpt3sas/mpt3sas_base.c index b2bcf4a27ddcdec154889a7bb4ef5d255c1aedb2..b785a7e88b498cb3a0e778d7543d91a682633ca0 100644 --- a/drivers/scsi/mpt3sas/mpt3sas_base.c +++ b/drivers/scsi/mpt3sas/mpt3sas_base.c @@ -2671,6 +2671,22 @@ _base_build_zero_len_sge_ieee(struct MPT3SAS_ADAPTER *ioc, void *paddr) _base_add_sg_single_ieee(paddr, sgl_flags, 0, 0, -1); } +static inline int _base_scsi_dma_map(struct scsi_cmnd *cmd) +{ + /* + * Some firmware versions byte-swap the REPORT ZONES command reply from + * ATA-ZAC devices by directly accessing in the host buffer. This does + * not respect the default command DMA direction and causes IOMMU page + * faults on some architectures with an IOMMU enforcing write mappings + * (e.g. AMD hosts). Avoid such issue by making the report zones buffer + * mapping bi-directional. + */ + if (cmd->cmnd[0] == ZBC_IN && cmd->cmnd[1] == ZI_REPORT_ZONES) + cmd->sc_data_direction = DMA_BIDIRECTIONAL; + + return scsi_dma_map(cmd); +} + /** * _base_build_sg_scmd - main sg creation routine * pcie_device is unused here! @@ -2717,7 +2733,7 @@ _base_build_sg_scmd(struct MPT3SAS_ADAPTER *ioc, sgl_flags = sgl_flags << MPI2_SGE_FLAGS_SHIFT; sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; @@ -2861,7 +2877,7 @@ _base_build_sg_scmd_ieee(struct MPT3SAS_ADAPTER *ioc, } sg_scmd = scsi_sglist(scmd); - sges_left = scsi_dma_map(scmd); + sges_left = _base_scsi_dma_map(scmd); if (sges_left < 0) return -ENOMEM; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index adeaa8ab995167674909b496d2c4804d73e872c5..dad3991397cf9a77777107a511b71587fc2ac805 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -2711,8 +2711,6 @@ static int read_capacity_16(struct scsi_disk *sdkp, struct scsi_device *sdp, if (buffer[14] & 0x40) /* LBPRZ */ sdkp->lbprz = 1; - - sd_config_discard(sdkp, lim, SD_LBP_WS16); } sdkp->capacity = lba + 1; @@ -3310,6 +3308,9 @@ static void sd_read_app_tag_own(struct scsi_disk *sdkp, unsigned char *buffer) static unsigned int sd_discard_mode(struct scsi_disk *sdkp) { + if (!sdkp->lbpme) + return SD_LBP_FULL; + if (!sdkp->lbpvpd) { /* LBP VPD page not provided */ if (sdkp->max_unmap_blocks) @@ -3365,8 +3366,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp, sdkp->unmap_alignment = get_unaligned_be32(&vpd->data[32]) & ~(1 << 31); - sd_config_discard(sdkp, lim, sd_discard_mode(sdkp)); - config_atomic: sdkp->max_atomic = get_unaligned_be32(&vpd->data[44]); sdkp->atomic_alignment = get_unaligned_be32(&vpd->data[48]); @@ -3753,9 +3752,10 @@ static int sd_revalidate_disk(struct gendisk *disk) sd_read_block_limits_ext(sdkp); sd_read_block_characteristics(sdkp, &lim); sd_zbc_read_zones(sdkp, &lim, buffer); - sd_read_cpr(sdkp); } + sd_config_discard(sdkp, &lim, sd_discard_mode(sdkp)); + sd_print_capacity(sdkp, old_capacity); sd_read_write_protect_flag(sdkp, buffer); @@ -3808,6 +3808,14 @@ static int sd_revalidate_disk(struct gendisk *disk) if (err) return err; + /* + * Query concurrent positioning ranges after + * queue_limits_commit_update() unlocked q->limits_lock to avoid + * deadlock with q->sysfs_dir_lock and q->sysfs_lock. + */ + if (sdkp->media_present && scsi_device_supports_vpd(sdp)) + sd_read_cpr(sdkp); + /* * For a zoned drive, revalidating the zones can be done only once * the gendisk capacity is set. So if this fails, set back the gendisk @@ -4205,6 +4213,8 @@ static int sd_resume(struct device *dev) { struct scsi_disk *sdkp = dev_get_drvdata(dev); + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); + if (opal_unlock_from_suspend(sdkp->opal_dev)) { sd_printk(KERN_NOTICE, sdkp, "OPAL unlock failed\n"); return -EIO; @@ -4221,13 +4231,12 @@ static int sd_resume_common(struct device *dev, bool runtime) if (!sdkp) /* E.g.: runtime resume at the start of sd_probe() */ return 0; - sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); - if (!sd_do_start_stop(sdkp->device, runtime)) { sdkp->suspended = false; return 0; } + sd_printk(KERN_NOTICE, sdkp, "Starting disk\n"); ret = sd_start_stop_device(sdkp, 1); if (!ret) { sd_resume(dev); diff --git a/drivers/scsi/sr_ioctl.c b/drivers/scsi/sr_ioctl.c index a0d2556a27bba32b43cda153bb6adb471c0eef0c..089653018d32c1d92663c777c8abccf475a662d6 100644 --- a/drivers/scsi/sr_ioctl.c +++ b/drivers/scsi/sr_ioctl.c @@ -431,7 +431,7 @@ int sr_select_speed(struct cdrom_device_info *cdi, unsigned long speed) struct packet_command cgc; /* avoid exceeding the max speed or overflowing integer bounds */ - speed = clamp(0, speed, 0xffff / 177); + speed = clamp(speed, 0, 0xffff / 177); if (speed == 0) speed = 0xffff; /* set to max */ diff --git a/drivers/soc/fsl/qbman/qman.c b/drivers/soc/fsl/qbman/qman.c index 7e9074519ad22d7d6b26a9148f5f65b4e2e79b4f..4dc8aba33d9b70cbdaab6369370da0ccf484a7b1 100644 --- a/drivers/soc/fsl/qbman/qman.c +++ b/drivers/soc/fsl/qbman/qman.c @@ -2546,11 +2546,6 @@ release_lock: } EXPORT_SYMBOL(qman_delete_cgr); -struct cgr_comp { - struct qman_cgr *cgr; - struct completion completion; -}; - static void qman_delete_cgr_smp_call(void *p) { qman_delete_cgr((struct qman_cgr *)p); diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c index 05ebb03d319fcb92fb847f481ddf47e17aac020a..d4607cb89c484008ee0ff521a86dd82e893b3991 100644 --- a/drivers/spi/spi-cadence-quadspi.c +++ b/drivers/spi/spi-cadence-quadspi.c @@ -2000,13 +2000,25 @@ static int cqspi_runtime_resume(struct device *dev) static int cqspi_suspend(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; - return spi_controller_suspend(cqspi->host); + ret = spi_controller_suspend(cqspi->host); + if (ret) + return ret; + + return pm_runtime_force_suspend(dev); } static int cqspi_resume(struct device *dev) { struct cqspi_st *cqspi = dev_get_drvdata(dev); + int ret; + + ret = pm_runtime_force_resume(dev); + if (ret) { + dev_err(dev, "pm_runtime_force_resume failed on resume\n"); + return ret; + } return spi_controller_resume(cqspi->host); } diff --git a/drivers/spi/spi-fsl-lpspi.c b/drivers/spi/spi-fsl-lpspi.c index 32baa14dfd83e5f93f81637ac5a4ce9fcbbc5456..350c5d91d869f63ce75332e8295b128e9ed7f72e 100644 --- a/drivers/spi/spi-fsl-lpspi.c +++ b/drivers/spi/spi-fsl-lpspi.c @@ -82,6 +82,10 @@ #define TCR_RXMSK BIT(19) #define TCR_TXMSK BIT(18) +struct fsl_lpspi_devtype_data { + u8 prescale_max; +}; + struct lpspi_config { u8 bpw; u8 chip_select; @@ -119,10 +123,25 @@ struct fsl_lpspi_data { bool usedma; struct completion dma_rx_completion; struct completion dma_tx_completion; + + const struct fsl_lpspi_devtype_data *devtype_data; +}; + +/* + * ERR051608 fixed or not: + * https://www.nxp.com/docs/en/errata/i.MX93_1P87f.pdf + */ +static struct fsl_lpspi_devtype_data imx93_lpspi_devtype_data = { + .prescale_max = 1, +}; + +static struct fsl_lpspi_devtype_data imx7ulp_lpspi_devtype_data = { + .prescale_max = 8, }; static const struct of_device_id fsl_lpspi_dt_ids[] = { - { .compatible = "fsl,imx7ulp-spi", }, + { .compatible = "fsl,imx7ulp-spi", .data = &imx7ulp_lpspi_devtype_data,}, + { .compatible = "fsl,imx93-spi", .data = &imx93_lpspi_devtype_data,}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, fsl_lpspi_dt_ids); @@ -296,10 +315,12 @@ static void fsl_lpspi_set_watermark(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) { struct lpspi_config config = fsl_lpspi->config; - unsigned int perclk_rate, scldiv; + unsigned int perclk_rate, scldiv, div; + u8 prescale_max; u8 prescale; perclk_rate = clk_get_rate(fsl_lpspi->clk_per); + prescale_max = fsl_lpspi->devtype_data->prescale_max; if (!config.speed_hz) { dev_err(fsl_lpspi->dev, @@ -313,8 +334,10 @@ static int fsl_lpspi_set_bitrate(struct fsl_lpspi_data *fsl_lpspi) return -EINVAL; } - for (prescale = 0; prescale < 8; prescale++) { - scldiv = perclk_rate / config.speed_hz / (1 << prescale) - 2; + div = DIV_ROUND_UP(perclk_rate, config.speed_hz); + + for (prescale = 0; prescale < prescale_max; prescale++) { + scldiv = div / (1 << prescale) - 2; if (scldiv < 256) { fsl_lpspi->config.prescale = prescale; break; @@ -820,6 +843,7 @@ static int fsl_lpspi_init_rpm(struct fsl_lpspi_data *fsl_lpspi) static int fsl_lpspi_probe(struct platform_device *pdev) { + const struct fsl_lpspi_devtype_data *devtype_data; struct fsl_lpspi_data *fsl_lpspi; struct spi_controller *controller; struct resource *res; @@ -828,6 +852,10 @@ static int fsl_lpspi_probe(struct platform_device *pdev) u32 temp; bool is_target; + devtype_data = of_device_get_match_data(&pdev->dev); + if (!devtype_data) + return -ENODEV; + is_target = of_property_read_bool((&pdev->dev)->of_node, "spi-slave"); if (is_target) controller = devm_spi_alloc_target(&pdev->dev, @@ -846,6 +874,7 @@ static int fsl_lpspi_probe(struct platform_device *pdev) fsl_lpspi->is_target = is_target; fsl_lpspi->is_only_cs1 = of_property_read_bool((&pdev->dev)->of_node, "fsl,spi-only-use-cs1-sel"); + fsl_lpspi->devtype_data = devtype_data; init_completion(&fsl_lpspi->xfer_done); diff --git a/drivers/spi/spi-hisi-kunpeng.c b/drivers/spi/spi-hisi-kunpeng.c index 77e9738e42f60ec844c74f09dddf5d90e898d87d..16054695bdb04abf03a232f3ffb6ca12ff434eb7 100644 --- a/drivers/spi/spi-hisi-kunpeng.c +++ b/drivers/spi/spi-hisi-kunpeng.c @@ -481,6 +481,9 @@ static int hisi_spi_probe(struct platform_device *pdev) return -EINVAL; } + if (host->max_speed_hz == 0) + return dev_err_probe(dev, -EINVAL, "spi-max-frequency can't be 0\n"); + ret = device_property_read_u16(dev, "num-cs", &host->num_chipselect); if (ret) @@ -495,6 +498,7 @@ static int hisi_spi_probe(struct platform_device *pdev) host->transfer_one = hisi_spi_transfer_one; host->handle_err = hisi_spi_handle_err; host->dev.fwnode = dev->fwnode; + host->min_speed_hz = DIV_ROUND_UP(host->max_speed_hz, CLK_DIV_MAX); hisi_spi_hw_init(hs); diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c index 616d032f1a89a2c361daa47532f43a0093c0920a..cc8dcf782399e92d9af49abdf0061ad771fba22f 100644 --- a/drivers/spi/spi-pxa2xx-pci.c +++ b/drivers/spi/spi-pxa2xx-pci.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -297,11 +298,23 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev, return ret; ssp->irq = pci_irq_vector(dev, 0); - return pxa2xx_spi_probe(&dev->dev, ssp); + ret = pxa2xx_spi_probe(&dev->dev, ssp, pdata); + if (ret) + return ret; + + pm_runtime_set_autosuspend_delay(&dev->dev, 50); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + + return 0; } static void pxa2xx_spi_pci_remove(struct pci_dev *dev) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pxa2xx_spi_remove(&dev->dev); } diff --git a/drivers/spi/spi-pxa2xx-platform.c b/drivers/spi/spi-pxa2xx-platform.c index 98a8ceb7db6febb102a5f76e1faaa59050576db2..595af9fa4e0f89837672f92785e55ddba8cca45f 100644 --- a/drivers/spi/spi-pxa2xx-platform.c +++ b/drivers/spi/spi-pxa2xx-platform.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -63,7 +64,7 @@ static struct ssp_device *pxa2xx_spi_ssp_request(struct platform_device *pdev) ssp = pxa_ssp_request(pdev->id, pdev->name); if (!ssp) - return ssp; + return NULL; status = devm_add_action_or_reset(&pdev->dev, pxa2xx_spi_ssp_release, ssp); if (status) @@ -142,14 +143,13 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) struct pxa2xx_spi_controller *platform_info; struct device *dev = &pdev->dev; struct ssp_device *ssp; + int ret; platform_info = dev_get_platdata(dev); if (!platform_info) { platform_info = pxa2xx_spi_init_pdata(pdev); if (IS_ERR(platform_info)) return dev_err_probe(dev, PTR_ERR(platform_info), "missing platform data\n"); - - dev->platform_data = platform_info; } ssp = pxa2xx_spi_ssp_request(pdev); @@ -158,12 +158,28 @@ static int pxa2xx_spi_platform_probe(struct platform_device *pdev) if (!ssp) ssp = &platform_info->ssp; - return pxa2xx_spi_probe(dev, ssp); + pm_runtime_set_autosuspend_delay(dev, 50); + pm_runtime_use_autosuspend(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + ret = pxa2xx_spi_probe(dev, ssp, platform_info); + if (ret) + pm_runtime_disable(dev); + + return ret; } static void pxa2xx_spi_platform_remove(struct platform_device *pdev) { - pxa2xx_spi_remove(&pdev->dev); + struct device *dev = &pdev->dev; + + pm_runtime_get_sync(dev); + + pxa2xx_spi_remove(dev); + + pm_runtime_put_noidle(dev); + pm_runtime_disable(dev); } static const struct acpi_device_id pxa2xx_spi_acpi_match[] = { diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c index 16b96eb176cd9db72e176fb3b8bc4bdb60999fff..bf1f34b0ffc8ebdc1fde387334c73178ccab4ffb 100644 --- a/drivers/spi/spi-pxa2xx.c +++ b/drivers/spi/spi-pxa2xx.c @@ -1277,16 +1277,15 @@ static size_t pxa2xx_spi_max_dma_transfer_size(struct spi_device *spi) return MAX_DMA_LEN; } -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp) +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info) { - struct pxa2xx_spi_controller *platform_info; struct spi_controller *controller; struct driver_data *drv_data; const struct lpss_config *config; int status; u32 tmp; - platform_info = dev_get_platdata(dev); if (platform_info->is_target) controller = devm_spi_alloc_target(dev, sizeof(*drv_data)); else @@ -1450,24 +1449,16 @@ int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp) } } - pm_runtime_set_autosuspend_delay(dev, 50); - pm_runtime_use_autosuspend(dev); - pm_runtime_set_active(dev); - pm_runtime_enable(dev); - /* Register with the SPI framework */ dev_set_drvdata(dev, drv_data); status = spi_register_controller(controller); if (status) { dev_err_probe(dev, status, "problem registering SPI controller\n"); - goto out_error_pm_runtime_enabled; + goto out_error_clock_enabled; } return status; -out_error_pm_runtime_enabled: - pm_runtime_disable(dev); - out_error_clock_enabled: clk_disable_unprepare(ssp->clk); @@ -1484,8 +1475,6 @@ void pxa2xx_spi_remove(struct device *dev) struct driver_data *drv_data = dev_get_drvdata(dev); struct ssp_device *ssp = drv_data->ssp; - pm_runtime_get_sync(dev); - spi_unregister_controller(drv_data->controller); /* Disable the SSP at the peripheral and SOC level */ @@ -1496,9 +1485,6 @@ void pxa2xx_spi_remove(struct device *dev) if (drv_data->controller_info->enable_dma) pxa2xx_spi_dma_release(drv_data); - pm_runtime_put_noidle(dev); - pm_runtime_disable(dev); - /* Release IRQ */ free_irq(ssp->irq, drv_data); } diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index a470d3d634d34d9b62673b278d1ea565aae7e26b..447be036938483ac311de0840685b555e250ba94 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -132,7 +132,8 @@ extern void pxa2xx_spi_dma_stop(struct driver_data *drv_data); extern int pxa2xx_spi_dma_setup(struct driver_data *drv_data); extern void pxa2xx_spi_dma_release(struct driver_data *drv_data); -int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp); +int pxa2xx_spi_probe(struct device *dev, struct ssp_device *ssp, + struct pxa2xx_spi_controller *platform_info); void pxa2xx_spi_remove(struct device *dev); extern const struct dev_pm_ops pxa2xx_spi_pm_ops; diff --git a/drivers/spi/spi-zynqmp-gqspi.c b/drivers/spi/spi-zynqmp-gqspi.c index 99524a3c9f382e580d40d4c8b8ad41df09471ab2..558c466135a51b7e99305d896c942ea749b4d926 100644 --- a/drivers/spi/spi-zynqmp-gqspi.c +++ b/drivers/spi/spi-zynqmp-gqspi.c @@ -1033,6 +1033,18 @@ static int __maybe_unused zynqmp_runtime_resume(struct device *dev) return 0; } +static unsigned long zynqmp_qspi_timeout(struct zynqmp_qspi *xqspi, u8 bits, + unsigned long bytes) +{ + unsigned long timeout; + + /* Assume we are at most 2x slower than the nominal bus speed */ + timeout = mult_frac(bytes, 2 * 8 * MSEC_PER_SEC, + bits * xqspi->speed_hz); + /* And add 100 ms for scheduling delays */ + return msecs_to_jiffies(timeout + 100); +} + /** * zynqmp_qspi_exec_op() - Initiates the QSPI transfer * @mem: The SPI memory @@ -1049,6 +1061,7 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, { struct zynqmp_qspi *xqspi = spi_controller_get_devdata (mem->spi->controller); + unsigned long timeout; int err = 0, i; u32 genfifoentry = 0; u16 opcode = op->cmd.opcode; @@ -1077,8 +1090,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, zynqmp_gqspi_write(xqspi, GQSPI_IER_OFST, GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->cmd.buswidth, + op->cmd.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1104,8 +1119,10 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_TXEMPTY_MASK | GQSPI_IER_GENFIFOEMPTY_MASK | GQSPI_IER_TXNOT_FULL_MASK); - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) { + timeout = zynqmp_qspi_timeout(xqspi, op->addr.buswidth, + op->addr.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, + timeout)) { err = -ETIMEDOUT; goto return_err; } @@ -1173,8 +1190,9 @@ static int zynqmp_qspi_exec_op(struct spi_mem *mem, GQSPI_IER_RXEMPTY_MASK); } } - if (!wait_for_completion_timeout - (&xqspi->data_completion, msecs_to_jiffies(1000))) + timeout = zynqmp_qspi_timeout(xqspi, op->data.buswidth, + op->data.nbytes); + if (!wait_for_completion_timeout(&xqspi->data_completion, timeout)) err = -ETIMEDOUT; } diff --git a/drivers/spi/spidev.c b/drivers/spi/spidev.c index 05e6d007f9a7f617c52c20fbc5f7dfa16d9838a4..5304728c68c20d3a7e62538585993c013744b3b7 100644 --- a/drivers/spi/spidev.c +++ b/drivers/spi/spidev.c @@ -700,6 +700,7 @@ static const struct class spidev_class = { }; static const struct spi_device_id spidev_spi_ids[] = { + { .name = "bh2228fv" }, { .name = "dh2228fv" }, { .name = "ltc2488" }, { .name = "sx1301" }, diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c index f240fcc5a4e14a61a7b9db39b3091527b7a4d07c..9ba9495fcc4bae401fa054f7da244d35f1c120ca 100644 --- a/drivers/spmi/spmi-pmic-arb.c +++ b/drivers/spmi/spmi-pmic-arb.c @@ -398,7 +398,7 @@ static int pmic_arb_fmt_read_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, u8 sid, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -477,7 +477,7 @@ static int pmic_arb_fmt_write_cmd(struct spmi_pmic_arb_bus *bus, u8 opc, *offset = rc; if (bc >= PMIC_ARB_MAX_TRANS_BYTES) { - dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested", + dev_err(&bus->spmic->dev, "pmic-arb supports 1..%d bytes per trans, but:%zu requested\n", PMIC_ARB_MAX_TRANS_BYTES, len); return -EINVAL; } @@ -1702,7 +1702,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "cnfg"); if (index < 0) { - dev_err(dev, "cnfg reg region missing"); + dev_err(dev, "cnfg reg region missing\n"); return -EINVAL; } @@ -1712,7 +1712,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, index = of_property_match_string(node, "reg-names", "intr"); if (index < 0) { - dev_err(dev, "intr reg region missing"); + dev_err(dev, "intr reg region missing\n"); return -EINVAL; } @@ -1737,8 +1737,7 @@ static int spmi_pmic_arb_bus_init(struct platform_device *pdev, dev_dbg(&pdev->dev, "adding irq domain for bus %d\n", bus_index); - bus->domain = irq_domain_add_tree(dev->of_node, - &pmic_arb_irq_domain_ops, bus); + bus->domain = irq_domain_add_tree(node, &pmic_arb_irq_domain_ops, bus); if (!bus->domain) { dev_err(&pdev->dev, "unable to create irq_domain\n"); return -ENOMEM; diff --git a/drivers/staging/media/atomisp/include/linux/atomisp.h b/drivers/staging/media/atomisp/include/linux/atomisp.h index 16c9da172c0317c1f3f958851a544992c4a977ee..fefbe3cd08f33d42ef81911088faabf343e42a32 100644 --- a/drivers/staging/media/atomisp/include/linux/atomisp.h +++ b/drivers/staging/media/atomisp/include/linux/atomisp.h @@ -20,7 +20,6 @@ #define _ATOM_ISP_H #include -#include /* struct media_device_info.hw_revision */ #define ATOMISP_HW_REVISION_MASK 0x0000ff00 diff --git a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h index 7349943bba2b469cae3a5a177711f5b307459c03..160c496784b7fc775f99fbdd52784493c3bfce9d 100644 --- a/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h +++ b/drivers/staging/media/atomisp/pci/hive_isp_css_include/math_support.h @@ -22,11 +22,6 @@ /* force a value to a lower even value */ #define EVEN_FLOOR(x) ((x) & ~1) -/* for preprocessor and array sizing use MIN and MAX - otherwise use min and max */ -#define MAX(a, b) (((a) > (b)) ? (a) : (b)) -#define MIN(a, b) (((a) < (b)) ? (a) : (b)) - #define CEIL_DIV(a, b) (((b) != 0) ? ((a) + (b) - 1) / (b) : 0) #define CEIL_MUL(a, b) (CEIL_DIV(a, b) * (b)) #define CEIL_MUL2(a, b) (((a) + (b) - 1) & ~((b) - 1)) diff --git a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h index 961c612880833a807366bed58d35a68f5a871d2f..aad860e54d3a71b7b9a443c152ad4f02bdac949a 100644 --- a/drivers/staging/media/atomisp/pci/ia_css_stream_public.h +++ b/drivers/staging/media/atomisp/pci/ia_css_stream_public.h @@ -27,12 +27,16 @@ #include "ia_css_prbs.h" #include "ia_css_input_port.h" -/* Input modes, these enumerate all supported input modes. - * Note that not all ISP modes support all input modes. +/* + * Input modes, these enumerate all supported input modes. + * This enum is part of the atomisp firmware ABI and must + * NOT be changed! + * Note that not all ISP modes support all input modes. */ enum ia_css_input_mode { IA_CSS_INPUT_MODE_SENSOR, /** data from sensor */ IA_CSS_INPUT_MODE_FIFO, /** data from input-fifo */ + IA_CSS_INPUT_MODE_TPG, /** data from test-pattern generator */ IA_CSS_INPUT_MODE_PRBS, /** data from pseudo-random bit stream */ IA_CSS_INPUT_MODE_MEMORY, /** data from a frame in memory */ IA_CSS_INPUT_MODE_BUFFERED_SENSOR /** data is sent through mipi buffer */ diff --git a/drivers/staging/media/atomisp/pci/sh_css_internal.h b/drivers/staging/media/atomisp/pci/sh_css_internal.h index a2d972ea3fa084dd2070666219c66aff6155bf36..959e7f549641c10f5243a734d69ea89b2a412c8d 100644 --- a/drivers/staging/media/atomisp/pci/sh_css_internal.h +++ b/drivers/staging/media/atomisp/pci/sh_css_internal.h @@ -344,7 +344,14 @@ struct sh_css_sp_input_formatter_set { #define IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT (3) -/* SP configuration information */ +/* + * SP configuration information + * + * This struct is part of the atomisp firmware ABI and is directly copied + * to ISP DRAM by sh_css_store_sp_group_to_ddr() + * + * Do NOT change this struct's layout or remove seemingly unused fields! + */ struct sh_css_sp_config { u8 no_isp_sync; /* Signal host immediately after start */ u8 enable_raw_pool_locking; /** Enable Raw Buffer Locking for HALv3 Support */ @@ -354,6 +361,10 @@ struct sh_css_sp_config { host (true) or when they are passed to the preview/video pipe (false). */ + /* + * Note the fields below are only used on the ISP2400 not on the ISP2401, + * sh_css_store_sp_group_to_ddr() skip copying these when run on the ISP2401. + */ struct { u8 a_changed; u8 b_changed; @@ -363,11 +374,13 @@ struct sh_css_sp_config { } input_formatter; sync_generator_cfg_t sync_gen; + tpg_cfg_t tpg; prbs_cfg_t prbs; input_system_cfg_t input_circuit; u8 input_circuit_cfg_changed; - u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; - u8 enable_isys_event_queue; + u32 mipi_sizes_for_check[N_CSI_PORTS][IA_CSS_MIPI_SIZE_CHECK_MAX_NOF_ENTRIES_PER_PORT]; + /* These last 2 fields are used on both the ISP2400 and the ISP2401 */ + u8 enable_isys_event_queue; u8 disable_cont_vf; }; diff --git a/drivers/thermal/gov_bang_bang.c b/drivers/thermal/gov_bang_bang.c index 4a2e869b9538c170112c3b09dc05ee31ac70bcc1..daed67d19efb818d9ac738df5d25a4c4fbd5eb53 100644 --- a/drivers/thermal/gov_bang_bang.c +++ b/drivers/thermal/gov_bang_bang.c @@ -13,6 +13,28 @@ #include "thermal_core.h" +static void bang_bang_set_instance_target(struct thermal_instance *instance, + unsigned int target) +{ + if (instance->target != 0 && instance->target != 1 && + instance->target != THERMAL_NO_TARGET) + pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", + instance->target, instance->name); + + /* + * Enable the fan when the trip is crossed on the way up and disable it + * when the trip is crossed on the way down. + */ + instance->target = target; + instance->initialized = true; + + dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + + mutex_lock(&instance->cdev->lock); + __thermal_cdev_update(instance->cdev); + mutex_unlock(&instance->cdev->lock); +} + /** * bang_bang_control - controls devices associated with the given zone * @tz: thermal_zone_device @@ -54,33 +76,60 @@ static void bang_bang_control(struct thermal_zone_device *tz, tz->temperature, trip->hysteresis); list_for_each_entry(instance, &tz->thermal_instances, tz_node) { - if (instance->trip != trip) - continue; + if (instance->trip == trip) + bang_bang_set_instance_target(instance, crossed_up); + } +} + +static void bang_bang_manage(struct thermal_zone_device *tz) +{ + const struct thermal_trip_desc *td; + struct thermal_instance *instance; - if (instance->target != 0 && instance->target != 1 && - instance->target != THERMAL_NO_TARGET) - pr_debug("Unexpected state %ld of thermal instance %s in bang-bang\n", - instance->target, instance->name); + /* If the code below has run already, nothing needs to be done. */ + if (tz->governor_data) + return; - /* - * Enable the fan when the trip is crossed on the way up and - * disable it when the trip is crossed on the way down. - */ - instance->target = crossed_up; + for_each_trip_desc(tz, td) { + const struct thermal_trip *trip = &td->trip; - dev_dbg(&instance->cdev->device, "target=%ld\n", instance->target); + if (tz->temperature >= td->threshold || + trip->temperature == THERMAL_TEMP_INVALID || + trip->type == THERMAL_TRIP_CRITICAL || + trip->type == THERMAL_TRIP_HOT) + continue; - mutex_lock(&instance->cdev->lock); - instance->cdev->updated = false; /* cdev needs update */ - mutex_unlock(&instance->cdev->lock); + /* + * If the initial cooling device state is "on", but the zone + * temperature is not above the trip point, the core will not + * call bang_bang_control() until the zone temperature reaches + * the trip point temperature which may be never. In those + * cases, set the initial state of the cooling device to 0. + */ + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (!instance->initialized && instance->trip == trip) + bang_bang_set_instance_target(instance, 0); + } } - list_for_each_entry(instance, &tz->thermal_instances, tz_node) - thermal_cdev_update(instance->cdev); + tz->governor_data = (void *)true; +} + +static void bang_bang_update_tz(struct thermal_zone_device *tz, + enum thermal_notify_event reason) +{ + /* + * Let bang_bang_manage() know that it needs to walk trips after binding + * a new cdev and after system resume. + */ + if (reason == THERMAL_TZ_BIND_CDEV || reason == THERMAL_TZ_RESUME) + tz->governor_data = NULL; } static struct thermal_governor thermal_gov_bang_bang = { .name = "bang_bang", .trip_crossed = bang_bang_control, + .manage = bang_bang_manage, + .update_tz = bang_bang_update_tz, }; THERMAL_GOVERNOR_DECLARE(thermal_gov_bang_bang); diff --git a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c index 114136893a59af51c61465881c50d70423dea54f..006614921870288f607e9c067dfa70f28b41af28 100644 --- a/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c +++ b/drivers/thermal/intel/int340x_thermal/processor_thermal_device_pci.c @@ -278,20 +278,32 @@ static struct thermal_zone_params tzone_params = { static bool msi_irq; +static void proc_thermal_free_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) +{ + int i; + + for (i = 0; i < MSI_THERMAL_MAX; i++) { + if (proc_thermal_msi_map[i]) + devm_free_irq(&pdev->dev, proc_thermal_msi_map[i], pci_info); + } + + pci_free_irq_vectors(pdev); +} + static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci *pci_info) { - int ret, i, irq; + int ret, i, irq, count; - ret = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); - if (ret < 0) { + count = pci_alloc_irq_vectors(pdev, 1, MSI_THERMAL_MAX, PCI_IRQ_MSI | PCI_IRQ_MSIX); + if (count < 0) { dev_err(&pdev->dev, "Failed to allocate vectors!\n"); - return ret; + return count; } dev_info(&pdev->dev, "msi enabled:%d msix enabled:%d\n", pdev->msi_enabled, pdev->msix_enabled); - for (i = 0; i < MSI_THERMAL_MAX; i++) { + for (i = 0; i < count; i++) { irq = pci_irq_vector(pdev, i); ret = devm_request_threaded_irq(&pdev->dev, irq, proc_thermal_irq_handler, @@ -310,7 +322,7 @@ static int proc_thermal_setup_msi(struct pci_dev *pdev, struct proc_thermal_pci return 0; err_free_msi_vectors: - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); return ret; } @@ -397,7 +409,7 @@ static int proc_thermal_pci_probe(struct pci_dev *pdev, const struct pci_device_ err_free_vectors: if (msi_irq) - pci_free_irq_vectors(pdev); + proc_thermal_free_msi(pdev, pci_info); err_ret_tzone: thermal_zone_device_unregister(pci_info->tzone); err_del_legacy: @@ -419,6 +431,9 @@ static void proc_thermal_pci_remove(struct pci_dev *pdev) proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_THRES_0, 0); proc_thermal_mmio_write(pci_info, PROC_THERMAL_MMIO_INT_ENABLE_0, 0); + if (msi_irq) + proc_thermal_free_msi(pdev, pci_info); + thermal_zone_device_unregister(pci_info->tzone); proc_thermal_mmio_remove(pdev, pci_info->proc_priv); if (!pci_info->no_legacy) diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 95c399f94744183b4d169f4db504f0d7becffb28..e6669aeda1fff0617a7441a60fd1c12210d9df92 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -1728,7 +1728,8 @@ static void thermal_zone_device_resume(struct work_struct *work) thermal_debug_tz_resume(tz); thermal_zone_device_init(tz); - __thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED); + thermal_governor_update_tz(tz, THERMAL_TZ_RESUME); + __thermal_zone_device_update(tz, THERMAL_TZ_RESUME); complete(&tz->resume); tz->resuming = false; diff --git a/drivers/thermal/thermal_debugfs.c b/drivers/thermal/thermal_debugfs.c index 7dd67bf4857198a81d91eb9b9a384afd27227bb0..939d3e5f181771e7cfc87870b6b3a79fa1147667 100644 --- a/drivers/thermal/thermal_debugfs.c +++ b/drivers/thermal/thermal_debugfs.c @@ -178,11 +178,11 @@ struct thermal_debugfs { void thermal_debug_init(void) { d_root = debugfs_create_dir("thermal", NULL); - if (!d_root) + if (IS_ERR(d_root)) return; d_cdev = debugfs_create_dir("cooling_devices", d_root); - if (!d_cdev) + if (IS_ERR(d_cdev)) return; d_tz = debugfs_create_dir("thermal_zones", d_root); @@ -202,7 +202,7 @@ static struct thermal_debugfs *thermal_debugfs_add_id(struct dentry *d, int id) snprintf(ids, IDSLENGTH, "%d", id); thermal_dbg->d_top = debugfs_create_dir(ids, d); - if (!thermal_dbg->d_top) { + if (IS_ERR(thermal_dbg->d_top)) { kfree(thermal_dbg); return NULL; } diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index aa34b6e82e268bce2cb964ad5fcea02122f046c1..1f252692815a18c627614382d0f251339bcc852f 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -125,7 +125,7 @@ static int thermal_of_populate_trip(struct device_node *np, static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *ntrips) { struct thermal_trip *tt; - struct device_node *trips, *trip; + struct device_node *trips; int ret, count; trips = of_get_child_by_name(np, "trips"); @@ -150,7 +150,7 @@ static struct thermal_trip *thermal_of_trips_init(struct device_node *np, int *n *ntrips = count; count = 0; - for_each_child_of_node(trips, trip) { + for_each_child_of_node_scoped(trips, trip) { ret = thermal_of_populate_trip(trip, &tt[count++]); if (ret) goto out_kfree; @@ -184,14 +184,14 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int * Search for each thermal zone, a defined sensor * corresponding to the one passed as parameter */ - for_each_available_child_of_node(np, tz) { + for_each_available_child_of_node_scoped(np, child) { int count, i; - count = of_count_phandle_with_args(tz, "thermal-sensors", + count = of_count_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells"); if (count <= 0) { - pr_err("%pOFn: missing thermal sensor\n", tz); + pr_err("%pOFn: missing thermal sensor\n", child); tz = ERR_PTR(-EINVAL); goto out; } @@ -200,18 +200,19 @@ static struct device_node *of_thermal_zone_find(struct device_node *sensor, int int ret; - ret = of_parse_phandle_with_args(tz, "thermal-sensors", + ret = of_parse_phandle_with_args(child, "thermal-sensors", "#thermal-sensor-cells", i, &sensor_specs); if (ret < 0) { - pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", tz, ret); + pr_err("%pOFn: Failed to read thermal-sensors cells: %d\n", child, ret); tz = ERR_PTR(ret); goto out; } if ((sensor == sensor_specs.np) && id == (sensor_specs.args_count ? sensor_specs.args[0] : 0)) { - pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, tz); + pr_debug("sensor %pOFn id=%d belongs to %pOFn\n", sensor, id, child); + tz = no_free_ptr(child); goto out; } } @@ -491,7 +492,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); - return ERR_CAST(trips); + ret = PTR_ERR(trips); + goto out_of_node_put; } ret = thermal_of_monitor_init(np, &delay, &pdelay); @@ -519,6 +521,7 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * goto out_kfree_trips; } + of_node_put(np); kfree(trips); ret = thermal_zone_device_enable(tz); @@ -533,6 +536,8 @@ static struct thermal_zone_device *thermal_of_zone_register(struct device_node * out_kfree_trips: kfree(trips); +out_of_node_put: + of_node_put(np); return ERR_PTR(ret); } diff --git a/drivers/thermal/thermal_trip.c b/drivers/thermal/thermal_trip.c index c0b679b846b3b90fd10738fd3c84226bb31f1f5a..06a0554ddc389156ccd51bbbc988bebaa47c0046 100644 --- a/drivers/thermal/thermal_trip.c +++ b/drivers/thermal/thermal_trip.c @@ -88,10 +88,10 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) return; for_each_trip_desc(tz, td) { - if (td->threshold < tz->temperature && td->threshold > low) + if (td->threshold <= tz->temperature && td->threshold > low) low = td->threshold; - if (td->threshold > tz->temperature && td->threshold < high) + if (td->threshold >= tz->temperature && td->threshold < high) high = td->threshold; } diff --git a/drivers/thunderbolt/debugfs.c b/drivers/thunderbolt/debugfs.c index 11185cc1db92994c5b2db3b1b21b3ed8848b4347..9ed4bb2e8d05fd2bb779c071612c6c16a45c19a5 100644 --- a/drivers/thunderbolt/debugfs.c +++ b/drivers/thunderbolt/debugfs.c @@ -323,16 +323,17 @@ static ssize_t port_sb_regs_write(struct file *file, const char __user *user_buf if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(port, port_sb_regs, ARRAY_SIZE(port_sb_regs), USB4_SB_TARGET_ROUTER, 0, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&sw->dev); pm_runtime_put_autosuspend(&sw->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } @@ -355,16 +356,17 @@ static ssize_t retimer_sb_regs_write(struct file *file, if (mutex_lock_interruptible(&tb->lock)) { ret = -ERESTARTSYS; - goto out_rpm_put; + goto out; } ret = sb_regs_write(rt->port, retimer_sb_regs, ARRAY_SIZE(retimer_sb_regs), USB4_SB_TARGET_RETIMER, rt->index, buf, count, ppos); mutex_unlock(&tb->lock); -out_rpm_put: +out: pm_runtime_mark_last_busy(&rt->dev); pm_runtime_put_autosuspend(&rt->dev); + free_page((unsigned long)buf); return ret < 0 ? ret : count; } diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c index 326433df5880e26cfef2bc4ce13fdbf37e6b79be..6a2116cbb06f92965028e01b51546903d6a0de56 100644 --- a/drivers/thunderbolt/switch.c +++ b/drivers/thunderbolt/switch.c @@ -3392,6 +3392,7 @@ void tb_switch_remove(struct tb_switch *sw) tb_switch_remove(port->remote->sw); port->remote = NULL; } else if (port->xdomain) { + port->xdomain->is_unplugged = true; tb_xdomain_remove(port->xdomain); port->xdomain = NULL; } diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c index 1af9aed99c6512096979498507680a8ac83b7544..afef1dd4ddf49c070efda75be10a1a2696e91413 100644 --- a/drivers/tty/serial/8250/8250_omap.c +++ b/drivers/tty/serial/8250/8250_omap.c @@ -27,7 +27,6 @@ #include #include #include -#include #include "8250.h" @@ -119,12 +118,6 @@ #define UART_OMAP_TO_L 0x26 #define UART_OMAP_TO_H 0x27 -/* - * Copy of the genpd flags for the console. - * Only used if console suspend is disabled - */ -static unsigned int genpd_flags_console; - struct omap8250_priv { void __iomem *membase; int line; @@ -1655,7 +1648,6 @@ static int omap8250_suspend(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err = 0; serial8250_suspend_port(priv->line); @@ -1666,19 +1658,8 @@ static int omap8250_suspend(struct device *dev) if (!device_may_wakeup(dev)) priv->wer = 0; serial_out(up, UART_OMAP_WER, priv->wer); - if (uart_console(&up->port)) { - if (console_suspend_enabled) - err = pm_runtime_force_suspend(dev); - else { - /* - * The pd shall not be powered-off (no console suspend). - * Make copy of genpd flags before to set it always on. - * The original value is restored during the resume. - */ - genpd_flags_console = genpd->flags; - genpd->flags |= GENPD_FLAG_ALWAYS_ON; - } - } + if (uart_console(&up->port) && console_suspend_enabled) + err = pm_runtime_force_suspend(dev); flush_work(&priv->qos_work); return err; @@ -1688,16 +1669,12 @@ static int omap8250_resume(struct device *dev) { struct omap8250_priv *priv = dev_get_drvdata(dev); struct uart_8250_port *up = serial8250_get_port(priv->line); - struct generic_pm_domain *genpd = pd_to_genpd(dev->pm_domain); int err; if (uart_console(&up->port) && console_suspend_enabled) { - if (console_suspend_enabled) { - err = pm_runtime_force_resume(dev); - if (err) - return err; - } else - genpd->flags = genpd_flags_console; + err = pm_runtime_force_resume(dev); + if (err) + return err; } serial8250_resume_port(priv->line); diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c index 0a90964d6d107f07415c89eb58ee87bf3fb63d71..09b246c9e389ecf791c818b180e216f99df7de26 100644 --- a/drivers/tty/serial/atmel_serial.c +++ b/drivers/tty/serial/atmel_serial.c @@ -2514,7 +2514,7 @@ static const struct uart_ops atmel_pops = { }; static const struct serial_rs485 atmel_rs485_supported = { - .flags = SER_RS485_ENABLED | SER_RS485_RTS_AFTER_SEND | SER_RS485_RX_DURING_TX, + .flags = SER_RS485_ENABLED | SER_RS485_RTS_ON_SEND | SER_RS485_RX_DURING_TX, .delay_rts_before_send = 1, .delay_rts_after_send = 1, }; diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c index 615291ea9b5e93b475e2133fd52ae8f448594b97..77efa7ee6eda29aa9a1d92740430327ffe416eb8 100644 --- a/drivers/tty/serial/fsl_lpuart.c +++ b/drivers/tty/serial/fsl_lpuart.c @@ -2923,6 +2923,7 @@ static int lpuart_probe(struct platform_device *pdev) pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT); pm_runtime_set_active(&pdev->dev); pm_runtime_enable(&pdev->dev); + pm_runtime_mark_last_busy(&pdev->dev); ret = lpuart_global_reset(sport); if (ret) diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c index c79dcd7c8d1a6032aed8ffc9bdaf7bd1623fd17a..b4c1798a1df2a079b4a398b7f8d38d45d3264578 100644 --- a/drivers/tty/serial/sc16is7xx.c +++ b/drivers/tty/serial/sc16is7xx.c @@ -327,6 +327,7 @@ struct sc16is7xx_one { struct kthread_work reg_work; struct kthread_delayed_work ms_work; struct sc16is7xx_one_config config; + unsigned char buf[SC16IS7XX_FIFO_SIZE]; /* Rx buffer. */ unsigned int old_mctrl; u8 old_lcr; /* Value before EFR access. */ bool irda_mode; @@ -340,7 +341,6 @@ struct sc16is7xx_port { unsigned long gpio_valid_mask; #endif u8 mctrl_mask; - unsigned char buf[SC16IS7XX_FIFO_SIZE]; struct kthread_worker kworker; struct task_struct *kworker_task; struct sc16is7xx_one p[]; @@ -592,6 +592,8 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) SC16IS7XX_MCR_CLKSEL_BIT, prescaler == 1 ? 0 : SC16IS7XX_MCR_CLKSEL_BIT); + mutex_lock(&one->efr_lock); + /* Backup LCR and access special register set (DLL/DLH) */ lcr = sc16is7xx_port_read(port, SC16IS7XX_LCR_REG); sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, @@ -606,24 +608,26 @@ static int sc16is7xx_set_baud(struct uart_port *port, int baud) /* Restore LCR and access to general register set */ sc16is7xx_port_write(port, SC16IS7XX_LCR_REG, lcr); + mutex_unlock(&one->efr_lock); + return DIV_ROUND_CLOSEST((clk / prescaler) / 16, div); } static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, unsigned int iir) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); + struct sc16is7xx_one *one = to_sc16is7xx_one(port, port); unsigned int lsr = 0, bytes_read, i; bool read_lsr = (iir == SC16IS7XX_IIR_RLSE_SRC) ? true : false; u8 ch, flag; - if (unlikely(rxlen >= sizeof(s->buf))) { + if (unlikely(rxlen >= sizeof(one->buf))) { dev_warn_ratelimited(port->dev, "ttySC%i: Possible RX FIFO overrun: %d\n", port->line, rxlen); port->icount.buf_overrun++; /* Ensure sanity of RX level */ - rxlen = sizeof(s->buf); + rxlen = sizeof(one->buf); } while (rxlen) { @@ -636,10 +640,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, lsr = 0; if (read_lsr) { - s->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); + one->buf[0] = sc16is7xx_port_read(port, SC16IS7XX_RHR_REG); bytes_read = 1; } else { - sc16is7xx_fifo_read(port, s->buf, rxlen); + sc16is7xx_fifo_read(port, one->buf, rxlen); bytes_read = rxlen; } @@ -672,7 +676,7 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, } for (i = 0; i < bytes_read; ++i) { - ch = s->buf[i]; + ch = one->buf[i]; if (uart_handle_sysrq_char(port, ch)) continue; @@ -690,10 +694,10 @@ static void sc16is7xx_handle_rx(struct uart_port *port, unsigned int rxlen, static void sc16is7xx_handle_tx(struct uart_port *port) { - struct sc16is7xx_port *s = dev_get_drvdata(port->dev); struct tty_port *tport = &port->state->port; unsigned long flags; unsigned int txlen; + unsigned char *tail; if (unlikely(port->x_char)) { sc16is7xx_port_write(port, SC16IS7XX_THR_REG, port->x_char); @@ -718,8 +722,9 @@ static void sc16is7xx_handle_tx(struct uart_port *port) txlen = 0; } - txlen = uart_fifo_out(port, s->buf, txlen); - sc16is7xx_fifo_write(port, s->buf, txlen); + txlen = kfifo_out_linear_ptr(&tport->xmit_fifo, &tail, txlen); + sc16is7xx_fifo_write(port, tail, txlen); + uart_xmit_advance(port, txlen); uart_port_lock_irqsave(port, &flags); if (kfifo_len(&tport->xmit_fifo) < WAKEUP_CHARS) diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 9a18d0b95a41909d2dfdc758ae464b611aecf4d7..5bea3af46abcef688d72f1bcb19074a53003f0ec 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -881,6 +881,14 @@ static int uart_set_info(struct tty_struct *tty, struct tty_port *port, new_flags = (__force upf_t)new_info->flags; old_custom_divisor = uport->custom_divisor; + if (!(uport->flags & UPF_FIXED_PORT)) { + unsigned int uartclk = new_info->baud_base * 16; + /* check needs to be done here before other settings made */ + if (uartclk == 0) { + retval = -EINVAL; + goto exit; + } + } if (!capable(CAP_SYS_ADMIN)) { retval = -EPERM; if (change_irq || change_port || diff --git a/drivers/tty/vt/conmakehash.c b/drivers/tty/vt/conmakehash.c index dc2177fec7156e5f10e65581f34be64077af620e..a931fcde7ad98d1e9703037ecb0a71761f4ffb5d 100644 --- a/drivers/tty/vt/conmakehash.c +++ b/drivers/tty/vt/conmakehash.c @@ -76,8 +76,7 @@ static void addpair(int fp, int un) int main(int argc, char *argv[]) { FILE *ctbl; - const char *tblname, *rel_tblname; - const char *abs_srctree; + const char *tblname; char buffer[65536]; int fontlen; int i, nuni, nent; @@ -102,16 +101,6 @@ int main(int argc, char *argv[]) } } - abs_srctree = getenv("abs_srctree"); - if (abs_srctree && !strncmp(abs_srctree, tblname, strlen(abs_srctree))) - { - rel_tblname = tblname + strlen(abs_srctree); - while (*rel_tblname == '/') - ++rel_tblname; - } - else - rel_tblname = tblname; - /* For now we assume the default font is always 256 characters. */ fontlen = 256; @@ -255,16 +244,13 @@ int main(int argc, char *argv[]) printf("\ /*\n\ - * Do not edit this file; it was automatically generated by\n\ - *\n\ - * conmakehash %s > [this file]\n\ - *\n\ + * Automatically generated file; Do not edit.\n\ */\n\ \n\ #include \n\ \n\ u8 dfont_unicount[%d] = \n\ -{\n\t", rel_tblname, fontlen); +{\n\t", fontlen); for ( i = 0 ; i < fontlen ; i++ ) { diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h index ce36154ce9638aa616a392ce19356e774e0effae..7aea8fbaeee8822290686567401e502e9365e853 100644 --- a/drivers/ufs/core/ufshcd-priv.h +++ b/drivers/ufs/core/ufshcd-priv.h @@ -316,6 +316,11 @@ static inline int ufshcd_rpm_get_sync(struct ufs_hba *hba) return pm_runtime_get_sync(&hba->ufs_device_wlun->sdev_gendev); } +static inline int ufshcd_rpm_get_if_active(struct ufs_hba *hba) +{ + return pm_runtime_get_if_active(&hba->ufs_device_wlun->sdev_gendev); +} + static inline int ufshcd_rpm_put_sync(struct ufs_hba *hba) { return pm_runtime_put_sync(&hba->ufs_device_wlun->sdev_gendev); diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index dc757ba47522b0997815e4159366282c7798076a..a6f818cdef0e7752169cffe46e454113351b0eb2 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -2416,7 +2416,21 @@ static inline int ufshcd_hba_capabilities(struct ufs_hba *hba) return err; } + /* + * The UFSHCI 3.0 specification does not define MCQ_SUPPORT and + * LSDB_SUPPORT, but [31:29] as reserved bits with reset value 0s, which + * means we can simply read values regardless of version. + */ hba->mcq_sup = FIELD_GET(MASK_MCQ_SUPPORT, hba->capabilities); + /* + * 0h: legacy single doorbell support is available + * 1h: indicate that legacy single doorbell support has been removed + */ + if (!(hba->quirks & UFSHCD_QUIRK_BROKEN_LSDBS_CAP)) + hba->lsdb_sup = !FIELD_GET(MASK_LSDB_SUPPORT, hba->capabilities); + else + hba->lsdb_sup = true; + if (!hba->mcq_sup) return 0; @@ -4090,11 +4104,16 @@ static inline void ufshcd_add_delay_before_dme_cmd(struct ufs_hba *hba) min_sleep_time_us = MIN_DELAY_BEFORE_DME_CMDS_US - delta; else - return; /* no more delay required */ + min_sleep_time_us = 0; /* no more delay required */ + } + + if (min_sleep_time_us > 0) { + /* allow sleep for extra 50us if needed */ + usleep_range(min_sleep_time_us, min_sleep_time_us + 50); } - /* allow sleep for extra 50us if needed */ - usleep_range(min_sleep_time_us, min_sleep_time_us + 50); + /* update the last_dme_cmd_tstamp */ + hba->last_dme_cmd_tstamp = ktime_get(); } /** @@ -6553,7 +6572,8 @@ again: if (ufshcd_err_handling_should_stop(hba)) goto skip_err_handling; - if (hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) { + if ((hba->dev_quirks & UFS_DEVICE_QUIRK_RECOVERY_FROM_DL_NAC_ERRORS) && + !hba->force_reset) { bool ret; spin_unlock_irqrestore(hba->host->host_lock, flags); @@ -8211,7 +8231,10 @@ static void ufshcd_update_rtc(struct ufs_hba *hba) */ val = ts64.tv_sec - hba->dev_info.rtc_time_baseline; - ufshcd_rpm_get_sync(hba); + /* Skip update RTC if RPM state is not RPM_ACTIVE */ + if (ufshcd_rpm_get_if_active(hba) <= 0) + return; + err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED, 0, 0, &val); ufshcd_rpm_put_sync(hba); @@ -10265,9 +10288,6 @@ int ufshcd_system_restore(struct device *dev) */ ufshcd_readl(hba, REG_UTP_TASK_REQ_LIST_BASE_H); - /* Resuming from hibernate, assume that link was OFF */ - ufshcd_set_link_off(hba); - return 0; } @@ -10496,6 +10516,12 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) } if (!is_mcq_supported(hba)) { + if (!hba->lsdb_sup) { + dev_err(hba->dev, "%s: failed to initialize (legacy doorbell mode not supported)\n", + __func__); + err = -EINVAL; + goto out_disable; + } err = scsi_add_host(host, hba->dev); if (err) { dev_err(hba->dev, "scsi_add_host failed\n"); diff --git a/drivers/ufs/host/ufs-exynos.c b/drivers/ufs/host/ufs-exynos.c index 16ad3528d80b2315e8b3ad82d0c1eafff9f9829e..9ec318ef52bff2dbc897ecb1268793ea14c5e2ce 100644 --- a/drivers/ufs/host/ufs-exynos.c +++ b/drivers/ufs/host/ufs-exynos.c @@ -1293,6 +1293,9 @@ static void exynos_ufs_fmp_resume(struct ufs_hba *hba) { struct arm_smccc_res res; + if (!(hba->caps & UFSHCD_CAP_CRYPTO)) + return; + arm_smccc_smc(SMC_CMD_FMP_SECURITY, 0, SMU_EMBEDDED, CFG_DESCTYPE_3, 0, 0, 0, 0, &res); if (res.a0) diff --git a/drivers/ufs/host/ufs-qcom.c b/drivers/ufs/host/ufs-qcom.c index 810e637047d04b26a59b235d769cd67faae13650..c87fdc849c627e07374d76f37a2ec52fa4555bbb 100644 --- a/drivers/ufs/host/ufs-qcom.c +++ b/drivers/ufs/host/ufs-qcom.c @@ -857,6 +857,9 @@ static void ufs_qcom_advertise_quirks(struct ufs_hba *hba) if (host->hw_ver.major > 0x3) hba->quirks |= UFSHCD_QUIRK_REINIT_AFTER_MAX_GEAR_SWITCH; + + if (of_device_is_compatible(hba->dev->of_node, "qcom,sm8550-ufshc")) + hba->quirks |= UFSHCD_QUIRK_BROKEN_LSDBS_CAP; } static void ufs_qcom_set_phy_gear(struct ufs_qcom_host *host) @@ -1847,7 +1850,8 @@ static void ufs_qcom_remove(struct platform_device *pdev) } static const struct of_device_id ufs_qcom_of_match[] __maybe_unused = { - { .compatible = "qcom,ufshc"}, + { .compatible = "qcom,ufshc" }, + { .compatible = "qcom,sm8550-ufshc" }, {}, }; MODULE_DEVICE_TABLE(of, ufs_qcom_of_match); diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index d8b096859337951f69dacaa14f72044f8faa9a20..e0ceaa72194937cdac8ca91de55b9d3c230b0108 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -3734,11 +3734,9 @@ static int ffs_func_set_alt(struct usb_function *f, if (alt > MAX_ALT_SETTINGS) return -EINVAL; - if (alt != (unsigned)-1) { - intf = ffs_func_revmap_intf(func, interface); - if (intf < 0) - return intf; - } + intf = ffs_func_revmap_intf(func, interface); + if (intf < 0) + return intf; if (ffs->func) ffs_func_eps_disable(ffs->func); @@ -3753,12 +3751,6 @@ static int ffs_func_set_alt(struct usb_function *f, if (ffs->state != FFS_ACTIVE) return -ENODEV; - if (alt == (unsigned)-1) { - ffs->func = NULL; - ffs_event_add(ffs, FUNCTIONFS_DISABLE); - return 0; - } - ffs->func = func; ret = ffs_func_eps_enable(func); if (ret >= 0) { @@ -3770,7 +3762,23 @@ static int ffs_func_set_alt(struct usb_function *f, static void ffs_func_disable(struct usb_function *f) { - ffs_func_set_alt(f, 0, (unsigned)-1); + struct ffs_function *func = ffs_func_from_usb(f); + struct ffs_data *ffs = func->ffs; + + if (ffs->func) + ffs_func_eps_disable(ffs->func); + + if (ffs->state == FFS_DEACTIVATED) { + ffs->state = FFS_CLOSING; + INIT_WORK(&ffs->reset_work, ffs_reset_work); + schedule_work(&ffs->reset_work); + return; + } + + if (ffs->state == FFS_ACTIVE) { + ffs->func = NULL; + ffs_event_add(ffs, FUNCTIONFS_DISABLE); + } } static int ffs_func_setup(struct usb_function *f, diff --git a/drivers/usb/gadget/function/f_midi2.c b/drivers/usb/gadget/function/f_midi2.c index 38e8ed3144f0603ce69cb33fa488a36ebdb4e388..3f63253ad3e074e7baf7f95d07173e627893e6cc 100644 --- a/drivers/usb/gadget/function/f_midi2.c +++ b/drivers/usb/gadget/function/f_midi2.c @@ -642,12 +642,21 @@ static void process_ump_stream_msg(struct f_midi2_ep *ep, const u32 *data) if (format) return; // invalid blk = (*data >> 8) & 0xff; - if (blk >= ep->num_blks) - return; - if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) - reply_ump_stream_fb_info(ep, blk); - if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) - reply_ump_stream_fb_name(ep, blk); + if (blk == 0xff) { + /* inquiry for all blocks */ + for (blk = 0; blk < ep->num_blks; blk++) { + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } + } else if (blk < ep->num_blks) { + /* only the specified block */ + if (*data & UMP_STREAM_MSG_REQUEST_FB_INFO) + reply_ump_stream_fb_info(ep, blk); + if (*data & UMP_STREAM_MSG_REQUEST_FB_NAME) + reply_ump_stream_fb_name(ep, blk); + } return; } } diff --git a/drivers/usb/gadget/function/u_audio.c b/drivers/usb/gadget/function/u_audio.c index 89af0feb75120864665a7c8004dfd7e66cb010f0..24299576972fe1d758bf1e9f39c63a0c3eb1fe3e 100644 --- a/drivers/usb/gadget/function/u_audio.c +++ b/drivers/usb/gadget/function/u_audio.c @@ -592,16 +592,25 @@ int u_audio_start_capture(struct g_audio *audio_dev) struct usb_ep *ep, *ep_fback; struct uac_rtd_params *prm; struct uac_params *params = &audio_dev->params; - int req_len, i; + int req_len, i, ret; prm = &uac->c_prm; dev_dbg(dev, "start capture with rate %d\n", prm->srate); ep = audio_dev->out_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for out_ep failed (%d)\n", ret); + return ret; + } + req_len = ep->maxpacket; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for out_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { @@ -629,9 +638,18 @@ int u_audio_start_capture(struct g_audio *audio_dev) return 0; /* Setup feedback endpoint */ - config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep_fback); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed in_ep_fback failed (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } + prm->fb_ep_enabled = true; - usb_ep_enable(ep_fback); + ret = usb_ep_enable(ep_fback); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep_fback (%d)\n", ret); + return ret; // TODO: Clean up out_ep + } req_len = ep_fback->maxpacket; req_fback = usb_ep_alloc_request(ep_fback, GFP_ATOMIC); @@ -687,13 +705,17 @@ int u_audio_start_playback(struct g_audio *audio_dev) struct uac_params *params = &audio_dev->params; unsigned int factor; const struct usb_endpoint_descriptor *ep_desc; - int req_len, i; + int req_len, i, ret; unsigned int p_pktsize; prm = &uac->p_prm; dev_dbg(dev, "start playback with rate %d\n", prm->srate); ep = audio_dev->in_ep; - config_ep_by_speed(gadget, &audio_dev->func, ep); + ret = config_ep_by_speed(gadget, &audio_dev->func, ep); + if (ret < 0) { + dev_err(dev, "config_ep_by_speed for in_ep failed (%d)\n", ret); + return ret; + } ep_desc = ep->desc; /* @@ -720,7 +742,11 @@ int u_audio_start_playback(struct g_audio *audio_dev) uac->p_residue_mil = 0; prm->ep_enabled = true; - usb_ep_enable(ep); + ret = usb_ep_enable(ep); + if (ret < 0) { + dev_err(dev, "usb_ep_enable failed for in_ep (%d)\n", ret); + return ret; + } for (i = 0; i < params->req_number; i++) { if (!prm->reqs[i]) { diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c index eec7f7a2e40f08bb2e002c9014a21136439d756c..b394105e55d6cc783c8dc93bfa90e991d122519b 100644 --- a/drivers/usb/gadget/function/u_serial.c +++ b/drivers/usb/gadget/function/u_serial.c @@ -1441,6 +1441,7 @@ void gserial_suspend(struct gserial *gser) spin_lock(&port->port_lock); spin_unlock(&serial_port_lock); port->suspended = true; + port->start_delayed = true; spin_unlock_irqrestore(&port->port_lock, flags); } EXPORT_SYMBOL_GPL(gserial_suspend); diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c index b0a6137584141cc13559304a911028312a542453..cf6478f97f4a3db1784d016a8b0d2610bd2891ad 100644 --- a/drivers/usb/gadget/udc/core.c +++ b/drivers/usb/gadget/udc/core.c @@ -118,12 +118,10 @@ int usb_ep_enable(struct usb_ep *ep) goto out; /* UDC drivers can't handle endpoints with maxpacket size 0 */ - if (usb_endpoint_maxp(ep->desc) == 0) { - /* - * We should log an error message here, but we can't call - * dev_err() because there's no way to find the gadget - * given only ep. - */ + if (!ep->desc || usb_endpoint_maxp(ep->desc) == 0) { + WARN_ONCE(1, "%s: ep%d (%s) has %s\n", __func__, ep->address, ep->name, + (!ep->desc) ? "NULL descriptor" : "maxpacket 0"); + ret = -EINVAL; goto out; } diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c index d7654f475dafbf45f82577fed04368b4dae0f067..937ce5fd58095b2efbd610d9453ab52541366af7 100644 --- a/drivers/usb/host/xhci-mem.c +++ b/drivers/usb/host/xhci-mem.c @@ -1872,7 +1872,7 @@ void xhci_mem_cleanup(struct xhci_hcd *xhci) cancel_delayed_work_sync(&xhci->cmd_timer); - for (i = 0; i < xhci->max_interrupters; i++) { + for (i = 0; xhci->interrupters && i < xhci->max_interrupters; i++) { if (xhci->interrupters[i]) { xhci_remove_interrupter(xhci, xhci->interrupters[i]); xhci_free_interrupter(xhci, xhci->interrupters[i]); diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index b7517c3c8059f4afb00e270bcbb288a67fdcffab..4ea2c3e072a9e3d6f77e48f6154c3a3f9bb9a848 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2910,6 +2910,7 @@ static int handle_tx_event(struct xhci_hcd *xhci, process_isoc_td(xhci, ep, ep_ring, td, ep_trb, event); else process_bulk_intr_td(xhci, ep, ep_ring, td, ep_trb, event); + return 0; check_endpoint_halted: if (xhci_halted_host_endpoint(ep_ctx, trb_comp_code)) diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 0a8cf6c17f827adb28c74b3ac80d4e013748a488..efdf4c228b8c0ace31ac8d956fbb1466971fe976 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -2837,7 +2837,7 @@ static int xhci_configure_endpoint(struct xhci_hcd *xhci, xhci->num_active_eps); return -ENOMEM; } - if ((xhci->quirks & XHCI_SW_BW_CHECKING) && + if ((xhci->quirks & XHCI_SW_BW_CHECKING) && !ctx_change && xhci_reserve_bandwidth(xhci, virt_dev, command->in_ctx)) { if ((xhci->quirks & XHCI_EP_LIMIT_QUIRK)) xhci_free_host_resources(xhci, ctrl_ctx); @@ -4200,8 +4200,10 @@ static int xhci_setup_device(struct usb_hcd *hcd, struct usb_device *udev, mutex_unlock(&xhci->mutex); ret = xhci_disable_slot(xhci, udev->slot_id); xhci_free_virt_device(xhci, udev->slot_id); - if (!ret) - xhci_alloc_dev(hcd, udev); + if (!ret) { + if (xhci_alloc_dev(hcd, udev) == 1) + xhci_setup_addressable_virt_dev(xhci, udev); + } kfree(command->completion); kfree(command); return -EPROTO; diff --git a/drivers/usb/misc/usb-ljca.c b/drivers/usb/misc/usb-ljca.c index 2d30fc1be30669c81c7208a50ae5b4fa8f841fca..1a8d5e80b9aec2daf3977fee7c0d873efaea6ba7 100644 --- a/drivers/usb/misc/usb-ljca.c +++ b/drivers/usb/misc/usb-ljca.c @@ -169,6 +169,7 @@ static const struct acpi_device_id ljca_gpio_hids[] = { { "INTC1096" }, { "INTC100B" }, { "INTC10D1" }, + { "INTC10B5" }, {}, }; diff --git a/drivers/usb/serial/ch341.c b/drivers/usb/serial/ch341.c index 612bea504d7a877aefcb2cf8413112ed7371183b..0870c6533f8012055a27082df59c4a2e4c5f79fa 100644 --- a/drivers/usb/serial/ch341.c +++ b/drivers/usb/serial/ch341.c @@ -863,4 +863,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Winchiphead CH341 USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c index 670e942fdaaa18d93b223f23640dbef5594da22e..6d6ec7eed87c921218670bb2c68b18677b15af2d 100644 --- a/drivers/usb/serial/garmin_gps.c +++ b/drivers/usb/serial/garmin_gps.c @@ -104,7 +104,7 @@ struct garmin_packet { int seq; /* the real size of the data array, always > 0 */ int size; - __u8 data[]; + __u8 data[] __counted_by(size); }; /* structure used to keep the current state of the driver */ @@ -267,8 +267,7 @@ static int pkt_add(struct garmin_data *garmin_data_p, /* process only packets containing data ... */ if (data_length) { - pkt = kmalloc(sizeof(struct garmin_packet)+data_length, - GFP_ATOMIC); + pkt = kmalloc(struct_size(pkt, data, data_length), GFP_ATOMIC); if (!pkt) return 0; diff --git a/drivers/usb/serial/mxuport.c b/drivers/usb/serial/mxuport.c index 1f7bb3e4fcf22d1d11ee44f6f1dcbb609110bb6c..942cb01534232e91d0a0d0cc34e77edcd51d5621 100644 --- a/drivers/usb/serial/mxuport.c +++ b/drivers/usb/serial/mxuport.c @@ -1315,4 +1315,5 @@ module_usb_serial_driver(serial_drivers, mxuport_idtable); MODULE_AUTHOR("Andrew Lunn "); MODULE_AUTHOR(""); +MODULE_DESCRIPTION("Moxa UPORT USB Serial driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/usb/serial/navman.c b/drivers/usb/serial/navman.c index 20277c52dded6b117f296bd7ee5e12a59155d2f5..82791fd67c468ecaa7f2df82e470f0035f3229ec 100644 --- a/drivers/usb/serial/navman.c +++ b/drivers/usb/serial/navman.c @@ -112,4 +112,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Navman USB Serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/qcaux.c b/drivers/usb/serial/qcaux.c index 929ffba663f2035fbaf1f46c8bb3de9495fa4daa..015bb7c5d19d3bda50cdeb9535ea220e562577aa 100644 --- a/drivers/usb/serial/qcaux.c +++ b/drivers/usb/serial/qcaux.c @@ -84,4 +84,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Qualcomm USB Auxiliary Serial Port driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/spcp8x5.c b/drivers/usb/serial/spcp8x5.c index 09a972a838ee4c18b85f10008af2a08ab69342fe..6b294bf8bc435fd539a147b934297d45cd7c6adb 100644 --- a/drivers/usb/serial/spcp8x5.c +++ b/drivers/usb/serial/spcp8x5.c @@ -49,16 +49,6 @@ static const struct usb_device_id id_table[] = { }; MODULE_DEVICE_TABLE(usb, id_table); -struct spcp8x5_usb_ctrl_arg { - u8 type; - u8 cmd; - u8 cmd_type; - u16 value; - u16 index; - u16 length; -}; - - /* spcp8x5 spec register define */ #define MCR_CONTROL_LINE_RTS 0x02 #define MCR_CONTROL_LINE_DTR 0x01 diff --git a/drivers/usb/serial/symbolserial.c b/drivers/usb/serial/symbolserial.c index d7f73ad6e778f9997ce47bc5237e4c25dc409adb..9aabb087f733cb03a119b58706d42782ed166001 100644 --- a/drivers/usb/serial/symbolserial.c +++ b/drivers/usb/serial/symbolserial.c @@ -190,4 +190,5 @@ static struct usb_serial_driver * const serial_drivers[] = { module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("Symbol USB barcode to serial driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb-serial-simple.c b/drivers/usb/serial/usb-serial-simple.c index 24b8772a345e2f898dcc345b9f01582b1a87e76a..82f4f0b992aaa76d387df54d915b895f6aea0699 100644 --- a/drivers/usb/serial/usb-serial-simple.c +++ b/drivers/usb/serial/usb-serial-simple.c @@ -163,4 +163,5 @@ static const struct usb_device_id id_table[] = { MODULE_DEVICE_TABLE(usb, id_table); module_usb_serial_driver(serial_drivers, id_table); +MODULE_DESCRIPTION("USB Serial 'Simple' driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c index 6934970f180d7fd32249da35d8c380a3a64b9463..61a8425b7762a471fb642039b1156d87a801aea4 100644 --- a/drivers/usb/serial/usb_debug.c +++ b/drivers/usb/serial/usb_debug.c @@ -76,6 +76,11 @@ static void usb_debug_process_read_urb(struct urb *urb) usb_serial_generic_process_read_urb(urb); } +static void usb_debug_init_termios(struct tty_struct *tty) +{ + tty->termios.c_lflag &= ~(ECHO | ECHONL); +} + static struct usb_serial_driver debug_device = { .driver = { .owner = THIS_MODULE, @@ -85,6 +90,7 @@ static struct usb_serial_driver debug_device = { .num_ports = 1, .bulk_out_size = USB_DEBUG_MAX_PACKET_SIZE, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -96,6 +102,7 @@ static struct usb_serial_driver dbc_device = { .id_table = dbc_id_table, .num_ports = 1, .break_ctl = usb_debug_break_ctl, + .init_termios = usb_debug_init_termios, .process_read_urb = usb_debug_process_read_urb, }; @@ -104,4 +111,5 @@ static struct usb_serial_driver * const serial_drivers[] = { }; module_usb_serial_driver(serial_drivers, id_table_combined); +MODULE_DESCRIPTION("USB Debug cable driver"); MODULE_LICENSE("GPL v2"); diff --git a/drivers/usb/typec/mux/fsa4480.c b/drivers/usb/typec/mux/fsa4480.c index cb7cdf90cb0aaa4184602ed2dab3f4e4382901ea..cd235339834b0b8aa0492e9eb194e283dc5b0958 100644 --- a/drivers/usb/typec/mux/fsa4480.c +++ b/drivers/usb/typec/mux/fsa4480.c @@ -13,6 +13,10 @@ #include #include +#define FSA4480_DEVICE_ID 0x00 + #define FSA4480_DEVICE_ID_VENDOR_ID GENMASK(7, 6) + #define FSA4480_DEVICE_ID_VERSION_ID GENMASK(5, 3) + #define FSA4480_DEVICE_ID_REV_ID GENMASK(2, 0) #define FSA4480_SWITCH_ENABLE 0x04 #define FSA4480_SWITCH_SELECT 0x05 #define FSA4480_SWITCH_STATUS1 0x07 @@ -251,6 +255,7 @@ static int fsa4480_probe(struct i2c_client *client) struct typec_switch_desc sw_desc = { }; struct typec_mux_desc mux_desc = { }; struct fsa4480 *fsa; + int val = 0; int ret; fsa = devm_kzalloc(dev, sizeof(*fsa), GFP_KERNEL); @@ -268,6 +273,15 @@ static int fsa4480_probe(struct i2c_client *client) if (IS_ERR(fsa->regmap)) return dev_err_probe(dev, PTR_ERR(fsa->regmap), "failed to initialize regmap\n"); + ret = regmap_read(fsa->regmap, FSA4480_DEVICE_ID, &val); + if (ret || !val) + return dev_err_probe(dev, -ENODEV, "FSA4480 not found\n"); + + dev_dbg(dev, "Found FSA4480 v%lu.%lu (Vendor ID = %lu)\n", + FIELD_GET(FSA4480_DEVICE_ID_VERSION_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_REV_ID, val), + FIELD_GET(FSA4480_DEVICE_ID_VENDOR_ID, val)); + /* Safe mode */ fsa->cur_enable = FSA4480_ENABLE_DEVICE | FSA4480_ENABLE_USB; fsa->mode = TYPEC_STATE_SAFE; diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c index b862fdf3fe1d4c6c35ec60ad5eae8af84a002cad..3e3dcb983ddeabbd457294788bcb9e0a5453da2f 100644 --- a/drivers/usb/typec/tcpm/tcpci.c +++ b/drivers/usb/typec/tcpm/tcpci.c @@ -67,7 +67,7 @@ static int tcpci_write16(struct tcpci *tcpci, unsigned int reg, u16 val) return regmap_raw_write(tcpci->regmap, reg, &val, sizeof(u16)); } -static bool tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) +static int tcpci_check_std_output_cap(struct regmap *regmap, u8 mask) { unsigned int reg; int ret; diff --git a/drivers/usb/typec/tcpm/tcpm.c b/drivers/usb/typec/tcpm/tcpm.c index 26f9006e95e169c2dca496cfe9a13753075e4562..4b02d6474259109c3ccfe662c96434f0d1ad55c2 100644 --- a/drivers/usb/typec/tcpm/tcpm.c +++ b/drivers/usb/typec/tcpm/tcpm.c @@ -4515,7 +4515,7 @@ static inline enum tcpm_state hard_reset_state(struct tcpm_port *port) return ERROR_RECOVERY; if (port->pwr_role == TYPEC_SOURCE) return SRC_UNATTACHED; - if (port->state == SNK_WAIT_CAPABILITIES) + if (port->state == SNK_WAIT_CAPABILITIES_TIMEOUT) return SNK_READY; return SNK_UNATTACHED; } @@ -5655,7 +5655,6 @@ static void run_state_machine(struct tcpm_port *port) break; case PORT_RESET: tcpm_reset_port(port); - port->pd_events = 0; if (port->self_powered) tcpm_set_cc(port, TYPEC_CC_OPEN); else diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c index ea768b19a7f1ec1da902b07ec447668b2bd4849d..dd51a25480bfb90572976a4b3ecbdc1291790a78 100644 --- a/drivers/usb/typec/tipd/core.c +++ b/drivers/usb/typec/tipd/core.c @@ -1191,14 +1191,14 @@ static int tps6598x_apply_patch(struct tps6598x *tps) dev_info(tps->dev, "Firmware update succeeded\n"); release_fw: - release_firmware(fw); if (ret) { dev_err(tps->dev, "Failed to write patch %s of %zu bytes\n", firmware_name, fw->size); } + release_firmware(fw); return ret; -}; +} static int cd321x_init(struct tps6598x *tps) { diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c index dcd3765cc1f532242af91ad1af5389b63adb87f0..4039851551c1b7251b5ae5ec1402092b55575073 100644 --- a/drivers/usb/typec/ucsi/ucsi.c +++ b/drivers/usb/typec/ucsi/ucsi.c @@ -137,7 +137,7 @@ static int ucsi_run_command(struct ucsi *ucsi, u64 command, u32 *cci, if (ret) return ret; - return err; + return err ?: UCSI_CCI_LENGTH(*cci); } static int ucsi_read_error(struct ucsi *ucsi, u8 connector_num) @@ -238,13 +238,10 @@ static int ucsi_send_command_common(struct ucsi *ucsi, u64 cmd, mutex_lock(&ucsi->ppm_lock); ret = ucsi_run_command(ucsi, cmd, &cci, data, size, conn_ack); - if (cci & UCSI_CCI_BUSY) { - ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false); - return ret ? ret : -EBUSY; - } - - if (cci & UCSI_CCI_ERROR) - return ucsi_read_error(ucsi, connector_num); + if (cci & UCSI_CCI_BUSY) + ret = ucsi_run_command(ucsi, UCSI_CANCEL, &cci, NULL, 0, false) ?: -EBUSY; + else if (cci & UCSI_CCI_ERROR) + ret = ucsi_read_error(ucsi, connector_num); mutex_unlock(&ucsi->ppm_lock); return ret; diff --git a/drivers/usb/usbip/vhci_hcd.c b/drivers/usb/usbip/vhci_hcd.c index 82650c11e4516125777dc55e4719e398fbbb644c..302a89aeb258a54211406ceb89cc89fdcbcf5519 100644 --- a/drivers/usb/usbip/vhci_hcd.c +++ b/drivers/usb/usbip/vhci_hcd.c @@ -745,6 +745,7 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag * */ if (usb_pipedevice(urb->pipe) == 0) { + struct usb_device *old; __u8 type = usb_pipetype(urb->pipe); struct usb_ctrlrequest *ctrlreq = (struct usb_ctrlrequest *) urb->setup_packet; @@ -755,14 +756,15 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag goto no_need_xmit; } + old = vdev->udev; switch (ctrlreq->bRequest) { case USB_REQ_SET_ADDRESS: /* set_address may come when a device is reset */ dev_info(dev, "SetAddress Request (%d) to port %d\n", ctrlreq->wValue, vdev->rhport); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); spin_lock(&vdev->ud.lock); vdev->ud.status = VDEV_ST_USED; @@ -781,8 +783,8 @@ static int vhci_urb_enqueue(struct usb_hcd *hcd, struct urb *urb, gfp_t mem_flag usbip_dbg_vhci_hc( "Not yet?:Get_Descriptor to device 0 (get max pipe size)\n"); - usb_put_dev(vdev->udev); vdev->udev = usb_get_dev(urb->dev); + usb_put_dev(old); goto out; default: @@ -1067,6 +1069,7 @@ static void vhci_shutdown_connection(struct usbip_device *ud) static void vhci_device_reset(struct usbip_device *ud) { struct vhci_device *vdev = container_of(ud, struct vhci_device, ud); + struct usb_device *old = vdev->udev; unsigned long flags; spin_lock_irqsave(&ud->lock, flags); @@ -1074,8 +1077,8 @@ static void vhci_device_reset(struct usbip_device *ud) vdev->speed = 0; vdev->devid = 0; - usb_put_dev(vdev->udev); vdev->udev = NULL; + usb_put_dev(old); if (ud->tcp_socket) { sockfd_put(ud->tcp_socket); diff --git a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c index 7fa0491bb201eb320faa5e947b55035642af0112..11bd76ae18cf937c5dd25ec7c000abe7a4b5c130 100644 --- a/drivers/vdpa/octeon_ep/octep_vdpa_hw.c +++ b/drivers/vdpa/octeon_ep/octep_vdpa_hw.c @@ -140,7 +140,7 @@ static int octep_process_mbox(struct octep_hw *oct_hw, u16 id, u16 qid, void *bu val = octep_read_sig(mbox); if ((val & 0xFFFF) != MBOX_RSP_SIG) { dev_warn(&pdev->dev, "Invalid Signature from mbox : %d response\n", id); - return ret; + return -EINVAL; } val = octep_read_sts(mbox); diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c index e31ec9ebc4ce842accc0ac2be8feb1a237c6e126..478cd46a49ede2cec23e38d6845e1e710dfa1bab 100644 --- a/drivers/vhost/vdpa.c +++ b/drivers/vhost/vdpa.c @@ -1481,13 +1481,7 @@ static vm_fault_t vhost_vdpa_fault(struct vm_fault *vmf) notify = ops->get_vq_notification(vdpa, index); - vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - if (remap_pfn_range(vma, vmf->address & PAGE_MASK, - PFN_DOWN(notify.addr), PAGE_SIZE, - vma->vm_page_prot)) - return VM_FAULT_SIGBUS; - - return VM_FAULT_NOPAGE; + return vmf_insert_pfn(vma, vmf->address & PAGE_MASK, PFN_DOWN(notify.addr)); } static const struct vm_operations_struct vhost_vdpa_vm_ops = { diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c index a9b93e99c23a411d66506d8d4a7b5d9547565962..bc1f962e483b985a2d9d94c993d32ba6126f1f44 100644 --- a/drivers/virtio/virtio.c +++ b/drivers/virtio/virtio.c @@ -305,15 +305,9 @@ static int virtio_dev_probe(struct device *_d) if (err) goto err; - if (dev->config->create_avq) { - err = dev->config->create_avq(dev); - if (err) - goto err; - } - err = drv->probe(dev); if (err) - goto err_probe; + goto err; /* If probe didn't do it, mark device DRIVER_OK ourselves. */ if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK)) @@ -326,9 +320,6 @@ static int virtio_dev_probe(struct device *_d) return 0; -err_probe: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return err; @@ -344,9 +335,6 @@ static void virtio_dev_remove(struct device *_d) drv->remove(dev); - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - /* Driver should have reset device. */ WARN_ON_ONCE(dev->config->get_status(dev)); @@ -524,9 +512,6 @@ int virtio_device_freeze(struct virtio_device *dev) } } - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); - return 0; } EXPORT_SYMBOL_GPL(virtio_device_freeze); @@ -562,16 +547,10 @@ int virtio_device_restore(struct virtio_device *dev) if (ret) goto err; - if (dev->config->create_avq) { - ret = dev->config->create_avq(dev); - if (ret) - goto err; - } - if (drv->restore) { ret = drv->restore(dev); if (ret) - goto err_restore; + goto err; } /* If restore didn't do it, mark device DRIVER_OK ourselves. */ @@ -582,9 +561,6 @@ int virtio_device_restore(struct virtio_device *dev) return 0; -err_restore: - if (dev->config->destroy_avq) - dev->config->destroy_avq(dev); err: virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED); return ret; diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c index 7d82facafd75ee98f8867e4d8b16ad5052da4eb2..c44d8ba00c02c38f9d641b8ccf423efbf86ea83b 100644 --- a/drivers/virtio/virtio_pci_common.c +++ b/drivers/virtio/virtio_pci_common.c @@ -46,12 +46,26 @@ bool vp_notify(struct virtqueue *vq) return true; } +/* Notify all slow path virtqueues on an interrupt. */ +static void vp_vring_slow_path_interrupt(int irq, + struct virtio_pci_device *vp_dev) +{ + struct virtio_pci_vq_info *info; + unsigned long flags; + + spin_lock_irqsave(&vp_dev->lock, flags); + list_for_each_entry(info, &vp_dev->slow_virtqueues, node) + vring_interrupt(irq, info->vq); + spin_unlock_irqrestore(&vp_dev->lock, flags); +} + /* Handle a configuration change: Tell driver if it wants to know. */ static irqreturn_t vp_config_changed(int irq, void *opaque) { struct virtio_pci_device *vp_dev = opaque; virtio_config_changed(&vp_dev->vdev); + vp_vring_slow_path_interrupt(irq, vp_dev); return IRQ_HANDLED; } @@ -125,6 +139,9 @@ static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors, GFP_KERNEL)) goto error; + if (!per_vq_vectors) + desc = NULL; + if (desc) { flags |= PCI_IRQ_AFFINITY; desc->pre_vectors++; /* virtio config vector */ @@ -171,11 +188,17 @@ error: return err; } +static bool vp_is_slow_path_vector(u16 msix_vec) +{ + return msix_vec == VP_MSIX_CONFIG_VECTOR; +} + static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int index, void (*callback)(struct virtqueue *vq), const char *name, bool ctx, - u16 msix_vec) + u16 msix_vec, + struct virtio_pci_vq_info **p_info) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL); @@ -194,13 +217,16 @@ static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned int in info->vq = vq; if (callback) { spin_lock_irqsave(&vp_dev->lock, flags); - list_add(&info->node, &vp_dev->virtqueues); + if (!vp_is_slow_path_vector(msix_vec)) + list_add(&info->node, &vp_dev->virtqueues); + else + list_add(&info->node, &vp_dev->slow_virtqueues); spin_unlock_irqrestore(&vp_dev->lock, flags); } else { INIT_LIST_HEAD(&info->node); } - vp_dev->vqs[index] = info; + *p_info = info; return vq; out_info: @@ -236,13 +262,11 @@ void vp_del_vqs(struct virtio_device *vdev) int i; list_for_each_entry_safe(vq, n, &vdev->vqs, list) { - if (vp_dev->is_avq && vp_dev->is_avq(vdev, vq->index)) - continue; - if (vp_dev->per_vq_vectors) { int v = vp_dev->vqs[vq->index]->msix_vector; - if (v != VIRTIO_MSI_NO_VECTOR) { + if (v != VIRTIO_MSI_NO_VECTOR && + !vp_is_slow_path_vector(v)) { int irq = pci_irq_vector(vp_dev->pci_dev, v); irq_update_affinity_hint(irq, NULL); @@ -284,21 +308,85 @@ void vp_del_vqs(struct virtio_device *vdev) vp_dev->vqs = NULL; } +enum vp_vq_vector_policy { + VP_VQ_VECTOR_POLICY_EACH, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, + VP_VQ_VECTOR_POLICY_SHARED, +}; + +static struct virtqueue * +vp_find_one_vq_msix(struct virtio_device *vdev, int queue_idx, + vq_callback_t *callback, const char *name, bool ctx, + bool slow_path, int *allocated_vectors, + enum vp_vq_vector_policy vector_policy, + struct virtio_pci_vq_info **p_info) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtqueue *vq; + u16 msix_vec; + int err; + + if (!callback) + msix_vec = VIRTIO_MSI_NO_VECTOR; + else if (vector_policy == VP_VQ_VECTOR_POLICY_EACH || + (vector_policy == VP_VQ_VECTOR_POLICY_SHARED_SLOW && + !slow_path)) + msix_vec = (*allocated_vectors)++; + else if (vector_policy != VP_VQ_VECTOR_POLICY_EACH && + slow_path) + msix_vec = VP_MSIX_CONFIG_VECTOR; + else + msix_vec = VP_MSIX_VQ_VECTOR; + vq = vp_setup_vq(vdev, queue_idx, callback, name, ctx, msix_vec, + p_info); + if (IS_ERR(vq)) + return vq; + + if (vector_policy == VP_VQ_VECTOR_POLICY_SHARED || + msix_vec == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(msix_vec)) + return vq; + + /* allocate per-vq irq if available and necessary */ + snprintf(vp_dev->msix_names[msix_vec], sizeof(*vp_dev->msix_names), + "%s-%s", dev_name(&vp_dev->vdev.dev), name); + err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), + vring_interrupt, 0, + vp_dev->msix_names[msix_vec], vq); + if (err) { + vp_del_vq(vq); + return ERR_PTR(err); + } + + return vq; +} + static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue *vqs[], struct virtqueue_info vqs_info[], - bool per_vq_vectors, + enum vp_vq_vector_policy vector_policy, struct irq_affinity *desc) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; struct virtqueue_info *vqi; - u16 msix_vec; int i, err, nvectors, allocated_vectors, queue_idx = 0; + struct virtqueue *vq; + bool per_vq_vectors; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto error_find; + } + + per_vq_vectors = vector_policy != VP_VQ_VECTOR_POLICY_SHARED; + if (per_vq_vectors) { /* Best option: one for change interrupt, one per vq. */ nvectors = 1; @@ -307,13 +395,14 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, if (vqi->name && vqi->callback) ++nvectors; } + if (avq_num && vector_policy == VP_VQ_VECTOR_POLICY_EACH) + ++nvectors; } else { /* Second best: one for change, shared for all vqs. */ nvectors = 2; } - err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, - per_vq_vectors ? desc : NULL); + err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors, desc); if (err) goto error_find; @@ -325,37 +414,27 @@ static int vp_find_vqs_msix(struct virtio_device *vdev, unsigned int nvqs, vqs[i] = NULL; continue; } - - if (!vqi->callback) - msix_vec = VIRTIO_MSI_NO_VECTOR; - else if (vp_dev->per_vq_vectors) - msix_vec = allocated_vectors++; - else - msix_vec = VP_MSIX_VQ_VECTOR; - vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, - vqi->name, vqi->ctx, msix_vec); + vqs[i] = vp_find_one_vq_msix(vdev, queue_idx++, vqi->callback, + vqi->name, vqi->ctx, false, + &allocated_vectors, vector_policy, + &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto error_find; } + } - if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR) - continue; - - /* allocate per-vq irq if available and necessary */ - snprintf(vp_dev->msix_names[msix_vec], - sizeof *vp_dev->msix_names, - "%s-%s", - dev_name(&vp_dev->vdev.dev), vqi->name); - err = request_irq(pci_irq_vector(vp_dev->pci_dev, msix_vec), - vring_interrupt, 0, - vp_dev->msix_names[msix_vec], - vqs[i]); - if (err) { - vp_del_vq(vqs[i]); - goto error_find; - } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_find_one_vq_msix(vdev, avq->vq_index, vp_modern_avq_done, + avq->name, false, true, &allocated_vectors, + vector_policy, &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto error_find; } + return 0; error_find: @@ -368,12 +447,21 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, struct virtqueue_info vqs_info[]) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); + struct virtio_pci_admin_vq *avq = &vp_dev->admin_vq; int i, err, queue_idx = 0; + struct virtqueue *vq; + u16 avq_num = 0; vp_dev->vqs = kcalloc(nvqs, sizeof(*vp_dev->vqs), GFP_KERNEL); if (!vp_dev->vqs) return -ENOMEM; + if (vp_dev->avq_index) { + err = vp_dev->avq_index(vdev, &avq->vq_index, &avq_num); + if (err) + goto out_del_vqs; + } + err = request_irq(vp_dev->pci_dev->irq, vp_interrupt, IRQF_SHARED, dev_name(&vdev->dev), vp_dev); if (err) @@ -390,13 +478,24 @@ static int vp_find_vqs_intx(struct virtio_device *vdev, unsigned int nvqs, } vqs[i] = vp_setup_vq(vdev, queue_idx++, vqi->callback, vqi->name, vqi->ctx, - VIRTIO_MSI_NO_VECTOR); + VIRTIO_MSI_NO_VECTOR, &vp_dev->vqs[i]); if (IS_ERR(vqs[i])) { err = PTR_ERR(vqs[i]); goto out_del_vqs; } } + if (!avq_num) + return 0; + sprintf(avq->name, "avq.%u", avq->vq_index); + vq = vp_setup_vq(vdev, queue_idx++, vp_modern_avq_done, avq->name, + false, VIRTIO_MSI_NO_VECTOR, + &vp_dev->admin_vq.info); + if (IS_ERR(vq)) { + err = PTR_ERR(vq); + goto out_del_vqs; + } + return 0; out_del_vqs: vp_del_vqs(vdev); @@ -411,11 +510,20 @@ int vp_find_vqs(struct virtio_device *vdev, unsigned int nvqs, int err; /* Try MSI-X with one vector per queue. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, true, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_EACH, desc); + if (!err) + return 0; + /* Fallback: MSI-X with one shared vector for config and + * slow path queues, one vector per queue for the rest. + */ + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED_SLOW, desc); if (!err) return 0; /* Fallback: MSI-X with one vector for config, one shared for queues. */ - err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, false, desc); + err = vp_find_vqs_msix(vdev, nvqs, vqs, vqs_info, + VP_VQ_VECTOR_POLICY_SHARED, desc); if (!err) return 0; /* Is there an interrupt? If not give up. */ @@ -466,7 +574,8 @@ const struct cpumask *vp_get_vq_affinity(struct virtio_device *vdev, int index) struct virtio_pci_device *vp_dev = to_vp_device(vdev); if (!vp_dev->per_vq_vectors || - vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR) + vp_dev->vqs[index]->msix_vector == VIRTIO_MSI_NO_VECTOR || + vp_is_slow_path_vector(vp_dev->vqs[index]->msix_vector)) return NULL; return pci_irq_get_affinity(vp_dev->pci_dev, @@ -574,6 +683,7 @@ static int virtio_pci_probe(struct pci_dev *pci_dev, vp_dev->vdev.dev.release = virtio_pci_release_dev; vp_dev->pci_dev = pci_dev; INIT_LIST_HEAD(&vp_dev->virtqueues); + INIT_LIST_HEAD(&vp_dev->slow_virtqueues); spin_lock_init(&vp_dev->lock); /* enable the device */ diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h index 3c4bb2d6163a92f918923a8cef65a901fb3c7e1f..1d9c49947f52d1b6e46057bbf9f0f71216f307b5 100644 --- a/drivers/virtio/virtio_pci_common.h +++ b/drivers/virtio/virtio_pci_common.h @@ -35,7 +35,7 @@ struct virtio_pci_vq_info { /* the actual virtqueue */ struct virtqueue *vq; - /* the list node for the virtqueues list */ + /* the list node for the virtqueues or slow_virtqueues list */ struct list_head node; /* MSI-X vector (or none) */ @@ -44,9 +44,9 @@ struct virtio_pci_vq_info { struct virtio_pci_admin_vq { /* Virtqueue info associated with this admin queue. */ - struct virtio_pci_vq_info info; - /* serializing admin commands execution and virtqueue deletion */ - struct mutex cmd_lock; + struct virtio_pci_vq_info *info; + /* Protects virtqueue access. */ + spinlock_t lock; u64 supported_cmds; /* Name of the admin queue: avq.$vq_index. */ char name[10]; @@ -66,9 +66,12 @@ struct virtio_pci_device { /* Where to read and clear interrupt */ u8 __iomem *isr; - /* a list of queues so we can dispatch IRQs */ + /* Lists of queues and potentially slow path queues + * so we can dispatch IRQs. + */ spinlock_t lock; struct list_head virtqueues; + struct list_head slow_virtqueues; /* Array of all virtqueues reported in the * PCI common config num_queues field @@ -102,7 +105,7 @@ struct virtio_pci_device { void (*del_vq)(struct virtio_pci_vq_info *info); u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector); - bool (*is_avq)(struct virtio_device *vdev, unsigned int index); + int (*avq_index)(struct virtio_device *vdev, u16 *index, u16 *num); }; /* Constants for MSI-X */ @@ -175,6 +178,7 @@ struct virtio_device *virtio_pci_vf_get_pf_dev(struct pci_dev *pdev); #define VIRTIO_ADMIN_CMD_BITMAP 0 #endif +void vp_modern_avq_done(struct virtqueue *vq); int vp_modern_admin_cmd_exec(struct virtio_device *vdev, struct virtio_admin_cmd *cmd); diff --git a/drivers/virtio/virtio_pci_modern.c b/drivers/virtio/virtio_pci_modern.c index 3b5b9499a53a658be513cdbe6ebc26805d699f44..9193c30d640aeb91b6bcd6bccc97bedda2418239 100644 --- a/drivers/virtio/virtio_pci_modern.c +++ b/drivers/virtio/virtio_pci_modern.c @@ -28,6 +28,21 @@ static u64 vp_get_features(struct virtio_device *vdev) return vp_modern_get_features(&vp_dev->mdev); } +static int vp_avq_index(struct virtio_device *vdev, u16 *index, u16 *num) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vdev); + + *num = 0; + if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) + return 0; + + *num = vp_modern_avq_num(&vp_dev->mdev); + if (!(*num)) + return -EINVAL; + *index = vp_modern_avq_index(&vp_dev->mdev); + return 0; +} + static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); @@ -38,17 +53,35 @@ static bool vp_is_avq(struct virtio_device *vdev, unsigned int index) return index == vp_dev->admin_vq.vq_index; } +void vp_modern_avq_done(struct virtqueue *vq) +{ + struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); + struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + unsigned long flags; + unsigned int len; + + spin_lock_irqsave(&admin_vq->lock, flags); + do { + virtqueue_disable_cb(vq); + while ((cmd = virtqueue_get_buf(vq, &len))) + complete(&cmd->completion); + } while (!virtqueue_enable_cb(vq)); + spin_unlock_irqrestore(&admin_vq->lock, flags); +} + static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, u16 opcode, struct scatterlist **sgs, unsigned int out_num, unsigned int in_num, - void *data) + struct virtio_admin_cmd *cmd) { struct virtqueue *vq; - int ret, len; + unsigned long flags; + int ret; - vq = admin_vq->info.vq; + vq = admin_vq->info->vq; if (!vq) return -EIO; @@ -57,21 +90,33 @@ static int virtqueue_exec_admin_cmd(struct virtio_pci_admin_vq *admin_vq, !((1ULL << opcode) & admin_vq->supported_cmds)) return -EOPNOTSUPP; - ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, data, GFP_KERNEL); - if (ret < 0) - return -EIO; + init_completion(&cmd->completion); - if (unlikely(!virtqueue_kick(vq))) +again: + if (virtqueue_is_broken(vq)) return -EIO; - while (!virtqueue_get_buf(vq, &len) && - !virtqueue_is_broken(vq)) - cpu_relax(); + spin_lock_irqsave(&admin_vq->lock, flags); + ret = virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, GFP_KERNEL); + if (ret < 0) { + if (ret == -ENOSPC) { + spin_unlock_irqrestore(&admin_vq->lock, flags); + cpu_relax(); + goto again; + } + goto unlock_err; + } + if (!virtqueue_kick(vq)) + goto unlock_err; + spin_unlock_irqrestore(&admin_vq->lock, flags); - if (virtqueue_is_broken(vq)) - return -EIO; + wait_for_completion(&cmd->completion); - return 0; + return cmd->ret; + +unlock_err: + spin_unlock_irqrestore(&admin_vq->lock, flags); + return -EIO; } int vp_modern_admin_cmd_exec(struct virtio_device *vdev, @@ -122,12 +167,9 @@ int vp_modern_admin_cmd_exec(struct virtio_device *vdev, in_num++; } - mutex_lock(&vp_dev->admin_vq.cmd_lock); ret = virtqueue_exec_admin_cmd(&vp_dev->admin_vq, le16_to_cpu(cmd->opcode), - sgs, out_num, in_num, sgs); - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - + sgs, out_num, in_num, cmd); if (ret) { dev_err(&vdev->dev, "Failed to execute command on admin vq: %d\n.", ret); @@ -188,25 +230,29 @@ end: static void vp_modern_avq_activate(struct virtio_device *vdev) { - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_unbreak(admin_vq->info.vq); virtio_pci_admin_cmd_list_init(vdev); } -static void vp_modern_avq_deactivate(struct virtio_device *vdev) +static void vp_modern_avq_cleanup(struct virtio_device *vdev) { struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *admin_vq = &vp_dev->admin_vq; + struct virtio_admin_cmd *cmd; + struct virtqueue *vq; if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) return; - __virtqueue_break(admin_vq->info.vq); + vq = vp_dev->vqs[vp_dev->admin_vq.vq_index]->vq; + if (!vq) + return; + + while ((cmd = virtqueue_detach_unused_buf(vq))) { + cmd->ret = -EIO; + complete(&cmd->completion); + } } static void vp_transport_features(struct virtio_device *vdev, u64 features) @@ -403,7 +449,7 @@ static void vp_reset(struct virtio_device *vdev) while (vp_modern_get_status(mdev)) msleep(1); - vp_modern_avq_deactivate(vdev); + vp_modern_avq_cleanup(vdev); /* Flush pending VQ/configuration callbacks. */ vp_synchronize_vectors(vdev); @@ -552,8 +598,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, if (index >= vp_modern_get_num_queues(mdev) && !is_avq) return ERR_PTR(-EINVAL); - num = is_avq ? - VIRTIO_AVQ_SGS_MAX : vp_modern_get_queue_size(mdev, index); + num = vp_modern_get_queue_size(mdev, index); /* Check if queue is either not available or already active. */ if (!num || vp_modern_get_queue_enable(mdev, index)) return ERR_PTR(-ENOENT); @@ -580,12 +625,6 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev, goto err; } - if (is_avq) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = vq; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - return vq; err: @@ -620,12 +659,6 @@ static void del_vq(struct virtio_pci_vq_info *info) struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev); struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - if (vp_is_avq(&vp_dev->vdev, vq->index)) { - mutex_lock(&vp_dev->admin_vq.cmd_lock); - vp_dev->admin_vq.info.vq = NULL; - mutex_unlock(&vp_dev->admin_vq.cmd_lock); - } - if (vp_dev->msix_enabled) vp_modern_queue_vector(mdev, vq->index, VIRTIO_MSI_NO_VECTOR); @@ -735,45 +768,6 @@ static bool vp_get_shm_region(struct virtio_device *vdev, return true; } -static int vp_modern_create_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - struct virtio_pci_admin_vq *avq; - struct virtqueue *vq; - u16 admin_q_num; - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return 0; - - admin_q_num = vp_modern_avq_num(&vp_dev->mdev); - if (!admin_q_num) - return -EINVAL; - - avq = &vp_dev->admin_vq; - avq->vq_index = vp_modern_avq_index(&vp_dev->mdev); - sprintf(avq->name, "avq.%u", avq->vq_index); - vq = vp_dev->setup_vq(vp_dev, &vp_dev->admin_vq.info, avq->vq_index, NULL, - avq->name, NULL, VIRTIO_MSI_NO_VECTOR); - if (IS_ERR(vq)) { - dev_err(&vdev->dev, "failed to setup admin virtqueue, err=%ld", - PTR_ERR(vq)); - return PTR_ERR(vq); - } - - vp_modern_set_queue_enable(&vp_dev->mdev, avq->info.vq->index, true); - return 0; -} - -static void vp_modern_destroy_avq(struct virtio_device *vdev) -{ - struct virtio_pci_device *vp_dev = to_vp_device(vdev); - - if (!virtio_has_feature(vdev, VIRTIO_F_ADMIN_VQ)) - return; - - vp_dev->del_vq(&vp_dev->admin_vq.info); -} - static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get = NULL, .set = NULL, @@ -792,8 +786,6 @@ static const struct virtio_config_ops virtio_pci_config_nodev_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; static const struct virtio_config_ops virtio_pci_config_ops = { @@ -814,8 +806,6 @@ static const struct virtio_config_ops virtio_pci_config_ops = { .get_shm_region = vp_get_shm_region, .disable_vq_and_reset = vp_modern_disable_vq_and_reset, .enable_vq_after_reset = vp_modern_enable_vq_after_reset, - .create_avq = vp_modern_create_avq, - .destroy_avq = vp_modern_destroy_avq, }; /* the PCI probing function */ @@ -839,11 +829,11 @@ int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev) vp_dev->config_vector = vp_config_vector; vp_dev->setup_vq = setup_vq; vp_dev->del_vq = del_vq; - vp_dev->is_avq = vp_is_avq; + vp_dev->avq_index = vp_avq_index; vp_dev->isr = mdev->isr; vp_dev->vdev.id = mdev->id; - mutex_init(&vp_dev->admin_vq.cmd_lock); + spin_lock_init(&vp_dev->admin_vq.lock); return 0; } @@ -851,6 +841,5 @@ void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev) { struct virtio_pci_modern_device *mdev = &vp_dev->mdev; - mutex_destroy(&vp_dev->admin_vq.cmd_lock); vp_modern_remove(mdev); } diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index a97ceb105cd8dd02060afa539acd8ee6749357cb..24fdc74caeba47b0ba5d782f4a1e68175c00256e 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -75,7 +75,8 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, err ?: total, false); } diff --git a/fs/afs/file.c b/fs/afs/file.c index c3f0c45ae9a9b6f6582accd6e9365862808fb7e1..ec1be0091fdb562d49a84dc14c2330c7265e9a65 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -242,7 +242,8 @@ static void afs_fetch_data_notify(struct afs_operation *op) req->error = error; if (subreq) { - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); netfs_subreq_terminated(subreq, error ?: req->actual_len, false); req->subreq = NULL; } else if (req->done) { diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index a7b425d3c8a0e4f068fde469f2347b4a5edbb449..331a17f3f113b48630744591572d8f9e5feb04c7 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -272,16 +272,19 @@ bch2_acl_to_xattr(struct btree_trans *trans, return xattr; } -struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap, - struct dentry *dentry, int type) +struct posix_acl *bch2_get_acl(struct inode *vinode, int type, bool rcu) { - struct bch_inode_info *inode = to_bch_ei(dentry->d_inode); + struct bch_inode_info *inode = to_bch_ei(vinode); struct bch_fs *c = inode->v.i_sb->s_fs_info; struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode); struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0); - struct btree_trans *trans = bch2_trans_get(c); struct btree_iter iter = { NULL }; struct posix_acl *acl = NULL; + + if (rcu) + return ERR_PTR(-ECHILD); + + struct btree_trans *trans = bch2_trans_get(c); retry: bch2_trans_begin(trans); diff --git a/fs/bcachefs/acl.h b/fs/bcachefs/acl.h index 27e7eec0f278c63784ec9520ffc31c7fad8cd7eb..fe730a6bf0c18c00c5c3db93ede033af5a63d62c 100644 --- a/fs/bcachefs/acl.h +++ b/fs/bcachefs/acl.h @@ -28,7 +28,7 @@ void bch2_acl_to_text(struct printbuf *, const void *, size_t); #ifdef CONFIG_BCACHEFS_POSIX_ACL -struct posix_acl *bch2_get_acl(struct mnt_idmap *, struct dentry *, int); +struct posix_acl *bch2_get_acl(struct inode *, int, bool); int bch2_set_acl_trans(struct btree_trans *, subvol_inum, struct bch_inode_unpacked *, diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9c5a92fa708fb32f00f312c1f4f7301eb9810a2..ba46f1c1d78aad32650a7735632530944d4e98ad 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -196,121 +196,119 @@ static unsigned bch_alloc_v1_val_u64s(const struct bch_alloc *a) return DIV_ROUND_UP(bytes, sizeof(u64)); } -int bch2_alloc_v1_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v1_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); int ret = 0; /* allow for unknown fields */ - bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), c, err, - alloc_v1_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(a.k) < bch_alloc_v1_val_u64s(a.v), + c, alloc_v1_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(a.k), bch_alloc_v1_val_u64s(a.v)); fsck_err: return ret; } -int bch2_alloc_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v2(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_alloc_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_alloc_unpacked u; int ret = 0; - bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), c, err, - alloc_v2_unpack_error, + bkey_fsck_err_on(bch2_alloc_unpack_v3(&u, k), + c, alloc_v2_unpack_error, "unpack error"); fsck_err: return ret; } -int bch2_alloc_v4_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_alloc_v4_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - struct bkey_s_c_alloc_v4 a = bkey_s_c_to_alloc_v4(k); + struct bch_alloc_v4 a; int ret = 0; - bkey_fsck_err_on(alloc_v4_u64s_noerror(a.v) > bkey_val_u64s(k.k), c, err, - alloc_v4_val_size_bad, + bkey_val_copy(&a, bkey_s_c_to_alloc_v4(k)); + + bkey_fsck_err_on(alloc_v4_u64s_noerror(&a) > bkey_val_u64s(k.k), + c, alloc_v4_val_size_bad, "bad val size (%u > %zu)", - alloc_v4_u64s_noerror(a.v), bkey_val_u64s(k.k)); + alloc_v4_u64s_noerror(&a), bkey_val_u64s(k.k)); - bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(a.v) && - BCH_ALLOC_V4_NR_BACKPOINTERS(a.v), c, err, - alloc_v4_backpointers_start_bad, + bkey_fsck_err_on(!BCH_ALLOC_V4_BACKPOINTERS_START(&a) && + BCH_ALLOC_V4_NR_BACKPOINTERS(&a), + c, alloc_v4_backpointers_start_bad, "invalid backpointers_start"); - bkey_fsck_err_on(alloc_data_type(*a.v, a.v->data_type) != a.v->data_type, c, err, - alloc_key_data_type_bad, + bkey_fsck_err_on(alloc_data_type(a, a.data_type) != a.data_type, + c, alloc_key_data_type_bad, "invalid data type (got %u should be %u)", - a.v->data_type, alloc_data_type(*a.v, a.v->data_type)); + a.data_type, alloc_data_type(a, a.data_type)); for (unsigned i = 0; i < 2; i++) - bkey_fsck_err_on(a.v->io_time[i] > LRU_TIME_MAX, - c, err, - alloc_key_io_time_bad, + bkey_fsck_err_on(a.io_time[i] > LRU_TIME_MAX, + c, alloc_key_io_time_bad, "invalid io_time[%s]: %llu, max %llu", i == READ ? "read" : "write", - a.v->io_time[i], LRU_TIME_MAX); + a.io_time[i], LRU_TIME_MAX); - unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(a.v) * sizeof(u64) > + unsigned stripe_sectors = BCH_ALLOC_V4_BACKPOINTERS_START(&a) * sizeof(u64) > offsetof(struct bch_alloc_v4, stripe_sectors) - ? a.v->stripe_sectors + ? a.stripe_sectors : 0; - switch (a.v->data_type) { + switch (a.data_type) { case BCH_DATA_free: case BCH_DATA_need_gc_gens: case BCH_DATA_need_discard: bkey_fsck_err_on(stripe_sectors || - a.v->dirty_sectors || - a.v->cached_sectors || - a.v->stripe, - c, err, alloc_key_empty_but_have_data, + a.dirty_sectors || + a.cached_sectors || + a.stripe, + c, alloc_key_empty_but_have_data, "empty data type free but have data %u.%u.%u %u", stripe_sectors, - a.v->dirty_sectors, - a.v->cached_sectors, - a.v->stripe); + a.dirty_sectors, + a.cached_sectors, + a.stripe); break; case BCH_DATA_sb: case BCH_DATA_journal: case BCH_DATA_btree: case BCH_DATA_user: case BCH_DATA_parity: - bkey_fsck_err_on(!a.v->dirty_sectors && + bkey_fsck_err_on(!a.dirty_sectors && !stripe_sectors, - c, err, alloc_key_dirty_sectors_0, + c, alloc_key_dirty_sectors_0, "data_type %s but dirty_sectors==0", - bch2_data_type_str(a.v->data_type)); + bch2_data_type_str(a.data_type)); break; case BCH_DATA_cached: - bkey_fsck_err_on(!a.v->cached_sectors || - a.v->dirty_sectors || + bkey_fsck_err_on(!a.cached_sectors || + a.dirty_sectors || stripe_sectors || - a.v->stripe, - c, err, alloc_key_cached_inconsistency, + a.stripe, + c, alloc_key_cached_inconsistency, "data type inconsistency"); - bkey_fsck_err_on(!a.v->io_time[READ] && + bkey_fsck_err_on(!a.io_time[READ] && c->curr_recovery_pass > BCH_RECOVERY_PASS_check_alloc_to_lru_refs, - c, err, alloc_key_cached_but_read_time_zero, + c, alloc_key_cached_but_read_time_zero, "cached bucket with read_time == 0"); break; case BCH_DATA_stripe: @@ -513,14 +511,13 @@ static unsigned alloc_gen(struct bkey_s_c k, unsigned offset) : 0; } -int bch2_bucket_gens_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bucket_gens_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), c, err, - bucket_gens_val_size_bad, + bkey_fsck_err_on(bkey_val_bytes(k.k) != sizeof(struct bch_bucket_gens), + c, bucket_gens_val_size_bad, "bad val size (%zu != %zu)", bkey_val_bytes(k.k), sizeof(struct bch_bucket_gens)); fsck_err: @@ -561,7 +558,7 @@ int bch2_bucket_gens_init(struct bch_fs *c) struct bpos pos = alloc_gens_pos(iter.pos, &offset); int ret2 = 0; - if (have_bucket_gens_key && bkey_cmp(iter.pos, pos)) { + if (have_bucket_gens_key && !bkey_eq(g.k.p, pos)) { ret2 = bch2_btree_insert_trans(trans, BTREE_ID_bucket_gens, &g.k_i, 0) ?: bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret2) @@ -829,7 +826,19 @@ int bch2_trigger_alloc(struct btree_trans *trans, struct bch_alloc_v4 old_a_convert; const struct bch_alloc_v4 *old_a = bch2_alloc_to_v4(old, &old_a_convert); - struct bch_alloc_v4 *new_a = bkey_s_to_alloc_v4(new).v; + + struct bch_alloc_v4 *new_a; + if (likely(new.k->type == KEY_TYPE_alloc_v4)) { + new_a = bkey_s_to_alloc_v4(new).v; + } else { + BUG_ON(!(flags & (BTREE_TRIGGER_gc|BTREE_TRIGGER_check_repair))); + + struct bkey_i_alloc_v4 *new_ka = bch2_alloc_to_v4_mut_inlined(trans, new.s_c); + ret = PTR_ERR_OR_ZERO(new_ka); + if (unlikely(ret)) + goto err; + new_a = &new_ka->v; + } if (flags & BTREE_TRIGGER_transactional) { alloc_data_type_set(new_a, new_a->data_type); @@ -1865,26 +1874,26 @@ static void bch2_do_discards_work(struct work_struct *work) trace_discard_buckets(c, s.seen, s.open, s.need_journal_commit, s.discarded, bch2_err_str(ret)); - bch2_write_ref_put(c, BCH_WRITE_REF_discard); percpu_ref_put(&ca->io_ref); + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_dev_do_discards(struct bch_dev *ca) { struct bch_fs *c = ca->fs; - if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) return; - if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard)) - goto put_ioref; + if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE)) + goto put_write_ref; if (queue_work(c->write_ref_wq, &ca->discard_work)) return; - bch2_write_ref_put(c, BCH_WRITE_REF_discard); -put_ioref: percpu_ref_put(&ca->io_ref); +put_write_ref: + bch2_write_ref_put(c, BCH_WRITE_REF_discard); } void bch2_do_discards(struct bch_fs *c) diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 8d2b62c9588e738364da6aab03000f271604fd5a..fd790b03fbe1987af43d01e9ad210ec3d94a8e3e 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -82,6 +82,14 @@ static inline bool bucket_data_type_mismatch(enum bch_data_type bucket, bucket_data_type(bucket) != bucket_data_type(ptr); } +/* + * It is my general preference to use unsigned types for unsigned quantities - + * however, these helpers are used in disk accounting calculations run by + * triggers where the output will be negated and added to an s64. unsigned is + * right out even though all these quantities will fit in 32 bits, since it + * won't be sign extended correctly; u64 will negate "correctly", but s64 is the + * simpler option here. + */ static inline s64 bch2_bucket_sectors_total(struct bch_alloc_v4 a) { return a.stripe_sectors + a.dirty_sectors + a.cached_sectors; @@ -142,7 +150,9 @@ static inline void alloc_data_type_set(struct bch_alloc_v4 *a, enum bch_data_typ static inline u64 alloc_lru_idx_read(struct bch_alloc_v4 a) { - return a.data_type == BCH_DATA_cached ? a.io_time[READ] : 0; + return a.data_type == BCH_DATA_cached + ? a.io_time[READ] & LRU_TIME_MAX + : 0; } #define DATA_TYPES_MOVABLE \ @@ -166,8 +176,8 @@ static inline u64 alloc_lru_idx_fragmentation(struct bch_alloc_v4 a, * avoid overflowing LRU_TIME_BITS on a corrupted fs, when * bucket_sectors_dirty is (much) bigger than bucket_size */ - u64 d = min(bch2_bucket_sectors_dirty(a), - ca->mi.bucket_size); + u64 d = min_t(s64, bch2_bucket_sectors_dirty(a), + ca->mi.bucket_size); return div_u64(d * (1ULL << 31), ca->mi.bucket_size); } @@ -232,52 +242,48 @@ struct bkey_i_alloc_v4 *bch2_alloc_to_v4_mut(struct btree_trans *, struct bkey_s int bch2_bucket_io_time_reset(struct btree_trans *, unsigned, size_t, int); -int bch2_alloc_v1_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_alloc_v4_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_alloc_v1_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v2_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v3_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int bch2_alloc_v4_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_alloc_v4_swab(struct bkey_s); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_alloc ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v1_invalid, \ + .key_validate = bch2_alloc_v1_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v2_invalid, \ + .key_validate = bch2_alloc_v2_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_alloc_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v3_invalid, \ + .key_validate = bch2_alloc_v3_validate, \ .val_to_text = bch2_alloc_to_text, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_alloc_v4 ((struct bkey_ops) { \ - .key_invalid = bch2_alloc_v4_invalid, \ + .key_validate = bch2_alloc_v4_validate, \ .val_to_text = bch2_alloc_to_text, \ .swab = bch2_alloc_v4_swab, \ .trigger = bch2_trigger_alloc, \ .min_val_size = 48, \ }) -int bch2_bucket_gens_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bucket_gens_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bucket_gens_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_bucket_gens ((struct bkey_ops) { \ - .key_invalid = bch2_bucket_gens_invalid, \ + .key_validate = bch2_bucket_gens_validate, \ .val_to_text = bch2_bucket_gens_to_text, \ }) diff --git a/fs/bcachefs/alloc_background_format.h b/fs/bcachefs/alloc_background_format.h index 47d9d006502cb1b335daa68238fb7625c8765bbc..f754a2951d8aab538aa92a64b04b00f8ce4ec8a4 100644 --- a/fs/bcachefs/alloc_background_format.h +++ b/fs/bcachefs/alloc_background_format.h @@ -69,6 +69,7 @@ struct bch_alloc_v4 { __u64 io_time[2]; __u32 stripe; __u32 nr_external_backpointers; + /* end of fields in original version of alloc_v4 */ __u64 fragmentation_lru; __u32 stripe_sectors; __u32 pad; diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 618d2ff0292ec397813c15cd2fa9f5c0fb2ced91..8563c2d268476e7f9f66457dfa62bbdb67747511 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -1603,7 +1603,8 @@ void bch2_open_bucket_to_text(struct printbuf *out, struct bch_fs *c, struct ope prt_newline(out); } -void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) +void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c, + struct bch_dev *ca) { struct open_bucket *ob; @@ -1613,7 +1614,8 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c) ob < c->open_buckets + ARRAY_SIZE(c->open_buckets); ob++) { spin_lock(&ob->lock); - if (ob->valid && !ob->on_partial_list) + if (ob->valid && !ob->on_partial_list && + (!ca || ob->dev == ca->dev_idx)) bch2_open_bucket_to_text(out, c, ob); spin_unlock(&ob->lock); } @@ -1738,7 +1740,7 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) printbuf_tabstop_push(out, 16); printbuf_tabstop_push(out, 16); - bch2_dev_usage_to_text(out, &stats); + bch2_dev_usage_to_text(out, ca, &stats); prt_newline(out); @@ -1756,11 +1758,12 @@ void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca) prt_printf(out, "buckets to invalidate\t%llu\r\n", should_invalidate_buckets(ca, stats)); } -void bch2_print_allocator_stuck(struct bch_fs *c) +static noinline void bch2_print_allocator_stuck(struct bch_fs *c) { struct printbuf buf = PRINTBUF; - prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n"); + prt_printf(&buf, "Allocator stuck? Waited for %u seconds\n", + c->opts.allocator_stuck_timeout); prt_printf(&buf, "Allocator debug:\n"); printbuf_indent_add(&buf, 2); @@ -1790,3 +1793,24 @@ void bch2_print_allocator_stuck(struct bch_fs *c) bch2_print_string_as_lines(KERN_ERR, buf.buf); printbuf_exit(&buf); } + +static inline unsigned allocator_wait_timeout(struct bch_fs *c) +{ + if (c->allocator_last_stuck && + time_after(c->allocator_last_stuck + HZ * 60 * 2, jiffies)) + return 0; + + return c->opts.allocator_stuck_timeout * HZ; +} + +void __bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + unsigned t = allocator_wait_timeout(c); + + if (t && closure_sync_timeout(cl, t)) { + c->allocator_last_stuck = jiffies; + bch2_print_allocator_stuck(c); + } + + closure_sync(cl); +} diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index 6da9e7e29026f20a6a43f251dfcc042bb67f3596..386d231ceca3f667f8871d32e540ca47c1fafc44 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -223,7 +223,7 @@ static inline struct write_point_specifier writepoint_ptr(struct write_point *wp void bch2_fs_allocator_foreground_init(struct bch_fs *); void bch2_open_bucket_to_text(struct printbuf *, struct bch_fs *, struct open_bucket *); -void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *); +void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *, struct bch_dev *); void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *); void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); @@ -231,6 +231,11 @@ void bch2_write_points_to_text(struct printbuf *, struct bch_fs *); void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *); void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *); -void bch2_print_allocator_stuck(struct bch_fs *); +void __bch2_wait_on_allocator(struct bch_fs *, struct closure *); +static inline void bch2_wait_on_allocator(struct bch_fs *c, struct closure *cl) +{ + if (cl->closure_get_happened) + __bch2_wait_on_allocator(c, cl); +} #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */ diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c index 3cc02479a9828e81ad787132b364c74484700000..d4da6343efa9bd028a9aca93223b6d261105a9fd 100644 --- a/fs/bcachefs/backpointers.c +++ b/fs/bcachefs/backpointers.c @@ -47,9 +47,8 @@ static bool extent_matches_bp(struct bch_fs *c, return false; } -int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); @@ -68,8 +67,7 @@ int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || !bpos_eq(bp.k->p, bp_pos), - c, err, - backpointer_bucket_offset_wrong, + c, backpointer_bucket_offset_wrong, "backpointer bucket_offset wrong"); fsck_err: return ret; @@ -763,27 +761,22 @@ static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, btree < BTREE_ID_NR && !ret; btree++) { unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; - struct btree_iter iter; - struct btree *b; if (!(BIT_ULL(btree) & btree_leaf_mask) && !(BIT_ULL(btree) & btree_interior_mask)) continue; - bch2_trans_begin(trans); - - __for_each_btree_node(trans, iter, btree, + ret = __for_each_btree_node(trans, iter, btree, btree == start.btree ? start.pos : POS_MIN, - 0, depth, BTREE_ITER_prefetch, b, ret) { + 0, depth, BTREE_ITER_prefetch, b, ({ mem_may_pin -= btree_buf_bytes(b); if (mem_may_pin <= 0) { c->btree_cache.pinned_nodes_end = *end = BBPOS(btree, b->key.k.p); - bch2_trans_iter_exit(trans, &iter); - return 0; + break; } - } - bch2_trans_iter_exit(trans, &iter); + 0; + })); } return ret; diff --git a/fs/bcachefs/backpointers.h b/fs/bcachefs/backpointers.h index 6021de1c5e98de9879c04564db4865c8a2b1748b..7daecadb764e3eb9afa2a3e6862ac063a156c21b 100644 --- a/fs/bcachefs/backpointers.h +++ b/fs/bcachefs/backpointers.h @@ -18,14 +18,13 @@ static inline u64 swab40(u64 x) ((x & 0xff00000000ULL) >> 32)); } -int bch2_backpointer_invalid(struct bch_fs *, struct bkey_s_c k, - enum bch_validate_flags, struct printbuf *); +int bch2_backpointer_validate(struct bch_fs *, struct bkey_s_c k, enum bch_validate_flags); void bch2_backpointer_to_text(struct printbuf *, const struct bch_backpointer *); void bch2_backpointer_k_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_backpointer_swab(struct bkey_s); #define bch2_bkey_ops_backpointer ((struct bkey_ops) { \ - .key_invalid = bch2_backpointer_invalid, \ + .key_validate = bch2_backpointer_validate, \ .val_to_text = bch2_backpointer_k_to_text, \ .swab = bch2_backpointer_swab, \ .min_val_size = 32, \ diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 91361a167dcd6329ed10ac7c791311d7412f21db..0c7086e00d18f55cb2cc19e6b598f1782a72d255 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -447,6 +447,7 @@ BCH_DEBUG_PARAMS_DEBUG() x(blocked_journal_low_on_space) \ x(blocked_journal_low_on_pin) \ x(blocked_journal_max_in_flight) \ + x(blocked_key_cache_flush) \ x(blocked_allocate) \ x(blocked_allocate_open_bucket) \ x(blocked_write_buffer_full) \ @@ -893,6 +894,8 @@ struct bch_fs { struct bch_fs_usage_base __percpu *usage; u64 __percpu *online_reserved; + unsigned long allocator_last_stuck; + struct io_clock io_clock[2]; /* JOURNAL SEQ BLACKLIST */ diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 74a60b1a4ddfa70f7353b7c99584b0a216d4b7c3..14ce726bf5a3cce27152e31c82e7a976bfb6005a 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -675,7 +675,10 @@ struct bch_sb_field_ext { x(btree_subvolume_children, BCH_VERSION(1, 6)) \ x(mi_btree_bitmap, BCH_VERSION(1, 7)) \ x(bucket_stripe_sectors, BCH_VERSION(1, 8)) \ - x(disk_accounting_v2, BCH_VERSION(1, 9)) + x(disk_accounting_v2, BCH_VERSION(1, 9)) \ + x(disk_accounting_v3, BCH_VERSION(1, 10)) \ + x(disk_accounting_inum, BCH_VERSION(1, 11)) \ + x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) enum bcachefs_metadata_version { bcachefs_metadata_version_min = 9, @@ -836,6 +839,8 @@ LE64_BITMASK(BCH_SB_BACKGROUND_COMPRESSION_TYPE_HI, LE64_BITMASK(BCH_SB_VERSION_UPGRADE_COMPLETE, struct bch_sb, flags[5], 0, 16); +LE64_BITMASK(BCH_SB_ALLOCATOR_STUCK_TIMEOUT, + struct bch_sb, flags[5], 16, 32); static inline __u64 BCH_SB_COMPRESSION_TYPE(const struct bch_sb *sb) { diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 936357149cf0fb580dac9431d8bebfb89850bb78..e34cb2bf329c54f4b297a6f4493b4f992a989c04 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -10,9 +10,10 @@ #include "vstructs.h" enum bch_validate_flags { - BCH_VALIDATE_write = (1U << 0), - BCH_VALIDATE_commit = (1U << 1), - BCH_VALIDATE_journal = (1U << 2), + BCH_VALIDATE_write = BIT(0), + BCH_VALIDATE_commit = BIT(1), + BCH_VALIDATE_journal = BIT(2), + BCH_VALIDATE_silent = BIT(3), }; #if 0 diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5f07cf853d0c75622c2bb22957da5d3034090c75..88d8958281e80f8e8840917ab3f43e0f4a7684ef 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -27,27 +27,27 @@ const char * const bch2_bkey_types[] = { NULL }; -static int deleted_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int deleted_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } #define bch2_bkey_ops_deleted ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) #define bch2_bkey_ops_whiteout ((struct bkey_ops) { \ - .key_invalid = deleted_key_invalid, \ + .key_validate = deleted_key_validate, \ }) -static int empty_val_key_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int empty_val_key_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k), c, err, - bkey_val_size_nonzero, + bkey_fsck_err_on(bkey_val_bytes(k.k), + c, bkey_val_size_nonzero, "incorrect value size (%zu != 0)", bkey_val_bytes(k.k)); fsck_err: @@ -55,11 +55,11 @@ fsck_err: } #define bch2_bkey_ops_error ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_cookie_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_cookie_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -73,17 +73,17 @@ static void key_type_cookie_to_text(struct printbuf *out, struct bch_fs *c, } #define bch2_bkey_ops_cookie ((struct bkey_ops) { \ - .key_invalid = key_type_cookie_invalid, \ + .key_validate = key_type_cookie_validate, \ .val_to_text = key_type_cookie_to_text, \ .min_val_size = 8, \ }) #define bch2_bkey_ops_hash_whiteout ((struct bkey_ops) {\ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ }) -static int key_type_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +static int key_type_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } @@ -98,9 +98,9 @@ static void key_type_inline_data_to_text(struct printbuf *out, struct bch_fs *c, datalen, min(datalen, 32U), d.v->data); } -#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ - .key_invalid = key_type_inline_data_invalid, \ - .val_to_text = key_type_inline_data_to_text, \ +#define bch2_bkey_ops_inline_data ((struct bkey_ops) { \ + .key_validate = key_type_inline_data_validate, \ + .val_to_text = key_type_inline_data_to_text, \ }) static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) @@ -110,7 +110,7 @@ static bool key_type_set_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_ } #define bch2_bkey_ops_set ((struct bkey_ops) { \ - .key_invalid = empty_val_key_invalid, \ + .key_validate = empty_val_key_validate, \ .key_merge = key_type_set_merge, \ }) @@ -123,9 +123,8 @@ const struct bkey_ops bch2_bkey_ops[] = { const struct bkey_ops bch2_bkey_null_ops = { }; -int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_val_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; @@ -133,15 +132,15 @@ int bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k, const struct bkey_ops *ops = bch2_bkey_type_ops(k.k->type); int ret = 0; - bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, c, err, - bkey_val_size_too_small, + bkey_fsck_err_on(bkey_val_bytes(k.k) < ops->min_val_size, + c, bkey_val_size_too_small, "bad val size (%zu < %u)", bkey_val_bytes(k.k), ops->min_val_size); - if (!ops->key_invalid) + if (!ops->key_validate) return 0; - ret = ops->key_invalid(c, k, flags, err); + ret = ops->key_validate(c, k, flags); fsck_err: return ret; } @@ -161,18 +160,17 @@ const char *bch2_btree_node_type_str(enum btree_node_type type) return type == BKEY_TYPE_btree ? "internal btree node" : bch2_btree_id_str(type - 1); } -int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, - enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) +int __bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type, + enum bch_validate_flags flags) { if (test_bit(BCH_FS_no_invalid_checks, &c->flags)) return 0; int ret = 0; - bkey_fsck_err_on(k.k->u64s < BKEY_U64s, c, err, - bkey_u64s_too_small, + bkey_fsck_err_on(k.k->u64s < BKEY_U64s, + c, bkey_u64s_too_small, "u64s too small (%u < %zu)", k.k->u64s, BKEY_U64s); if (type >= BKEY_TYPE_NR) @@ -180,8 +178,8 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, bkey_fsck_err_on(k.k->type < KEY_TYPE_MAX && (type == BKEY_TYPE_btree || (flags & BCH_VALIDATE_commit)) && - !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), c, err, - bkey_invalid_type_for_btree, + !(bch2_key_types_allowed[type] & BIT_ULL(k.k->type)), + c, bkey_invalid_type_for_btree, "invalid key type for btree %s (%s)", bch2_btree_node_type_str(type), k.k->type < KEY_TYPE_MAX @@ -189,17 +187,17 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, : "(unknown)"); if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { - bkey_fsck_err_on(k.k->size == 0, c, err, - bkey_extent_size_zero, + bkey_fsck_err_on(k.k->size == 0, + c, bkey_extent_size_zero, "size == 0"); - bkey_fsck_err_on(k.k->size > k.k->p.offset, c, err, - bkey_extent_size_greater_than_offset, + bkey_fsck_err_on(k.k->size > k.k->p.offset, + c, bkey_extent_size_greater_than_offset, "size greater than offset (%u > %llu)", k.k->size, k.k->p.offset); } else { - bkey_fsck_err_on(k.k->size, c, err, - bkey_size_nonzero, + bkey_fsck_err_on(k.k->size, + c, bkey_size_nonzero, "size != 0"); } @@ -207,12 +205,12 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_id btree = type - 1; if (btree_type_has_snapshots(btree)) { - bkey_fsck_err_on(!k.k->p.snapshot, c, err, - bkey_snapshot_zero, + bkey_fsck_err_on(!k.k->p.snapshot, + c, bkey_snapshot_zero, "snapshot == 0"); } else if (!btree_type_has_snapshot_field(btree)) { - bkey_fsck_err_on(k.k->p.snapshot, c, err, - bkey_snapshot_nonzero, + bkey_fsck_err_on(k.k->p.snapshot, + c, bkey_snapshot_nonzero, "nonzero snapshot"); } else { /* @@ -221,34 +219,33 @@ int __bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, */ } - bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), c, err, - bkey_at_pos_max, + bkey_fsck_err_on(bkey_eq(k.k->p, POS_MAX), + c, bkey_at_pos_max, "key at POS_MAX"); } fsck_err: return ret; } -int bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_validate(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type, - enum bch_validate_flags flags, - struct printbuf *err) + enum bch_validate_flags flags) { - return __bch2_bkey_invalid(c, k, type, flags, err) ?: - bch2_bkey_val_invalid(c, k, flags, err); + return __bch2_bkey_validate(c, k, type, flags) ?: + bch2_bkey_val_validate(c, k, flags); } int bch2_bkey_in_btree_node(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, struct printbuf *err) + struct bkey_s_c k, enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), c, err, - bkey_before_start_of_btree_node, + bkey_fsck_err_on(bpos_lt(k.k->p, b->data->min_key), + c, bkey_before_start_of_btree_node, "key before start of btree node"); - bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), c, err, - bkey_after_end_of_btree_node, + bkey_fsck_err_on(bpos_gt(k.k->p, b->data->max_key), + c, bkey_after_end_of_btree_node, "key past end of btree node"); fsck_err: return ret; diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index baef0722f5fb60c7db294bd4d64018da281f97e9..3df3dd2723a128f8ae11148e5d5ffeb01fe04edf 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -14,15 +14,15 @@ extern const char * const bch2_bkey_types[]; extern const struct bkey_ops bch2_bkey_null_ops; /* - * key_invalid: checks validity of @k, returns 0 if good or -EINVAL if bad. If + * key_validate: checks validity of @k, returns 0 if good or -EINVAL if bad. If * invalid, entire key will be deleted. * * When invalid, error string is returned via @err. @rw indicates whether key is * being read or written; more aggressive checks can be enabled when rw == WRITE. */ struct bkey_ops { - int (*key_invalid)(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err); + int (*key_validate)(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags); void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(struct bkey_s); @@ -48,14 +48,13 @@ static inline const struct bkey_ops *bch2_bkey_type_ops(enum bch_bkey_type type) : &bch2_bkey_null_ops; } -int bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int __bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, enum btree_node_type, - enum bch_validate_flags, struct printbuf *); -int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, - struct bkey_s_c, struct printbuf *); +int bch2_bkey_val_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +int __bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_validate(struct bch_fs *, struct bkey_s_c, enum btree_node_type, + enum bch_validate_flags); +int bch2_bkey_in_btree_node(struct bch_fs *, struct btree *, struct bkey_s_c, + enum bch_validate_flags); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index f5d85b50b6f2f78c784c6ee3f10e4c5e23035171..e52a06d3418ccd0226730e4e42737431a57cb6b6 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -159,6 +159,16 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c) return b; } +void bch2_btree_node_to_freelist(struct bch_fs *c, struct btree *b) +{ + mutex_lock(&c->btree_cache.lock); + list_move(&b->list, &c->btree_cache.freeable); + mutex_unlock(&c->btree_cache.lock); + + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); +} + /* Btree in memory cache - hash table */ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) @@ -736,6 +746,13 @@ out: start_time); memalloc_nofs_restore(flags); + + int ret = bch2_trans_relock(trans); + if (unlikely(ret)) { + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); + } + return b; err: mutex_lock(&bc->lock); @@ -856,6 +873,10 @@ static noinline struct btree *bch2_btree_node_fill(struct btree_trans *trans, bch2_btree_node_read(trans, b, sync); + int ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + if (!sync) return NULL; @@ -974,6 +995,10 @@ retry: bch2_btree_node_wait_on_read(b); + ret = bch2_trans_relock(trans); + if (ret) + return ERR_PTR(ret); + /* * should_be_locked is not set on this path yet, so we need to * relock it specifically: diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index c0eb87a057ccb92c43e3c8c8e2a46e050fe5dc6a..f82064007127203e80033359ec4cd4010cccad79 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -12,6 +12,8 @@ struct btree_iter; void bch2_recalc_btree_reserve(struct bch_fs *); +void bch2_btree_node_to_freelist(struct bch_fs *, struct btree *); + void bch2_btree_node_hash_remove(struct btree_cache *, struct btree *); int __bch2_btree_node_hash_insert(struct btree_cache *, struct btree *); int bch2_btree_node_hash_insert(struct btree_cache *, struct btree *, diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 6cbf2aa6a9479b346f2f64bd650b41f6111fec08..eb3002c4eae7b1615c8a16b17b60e119fca7a8c1 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -741,12 +741,9 @@ fsck_err: static int bch2_mark_superblocks(struct bch_fs *c) { - mutex_lock(&c->sb_lock); gc_pos_set(c, gc_phase(GC_PHASE_sb)); - int ret = bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); - mutex_unlock(&c->sb_lock); - return ret; + return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_gc); } static void bch2_gc_free(struct bch_fs *c) diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 2c424435ca4aaa531e9148e66d6d610a01bbce4d..56ea9a77cd4afdfb2c72e8878d60e068c291494a 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -836,14 +836,13 @@ fsck_err: return ret; } -static int bset_key_invalid(struct bch_fs *c, struct btree *b, - struct bkey_s_c k, - bool updated_range, int rw, - struct printbuf *err) +static int bset_key_validate(struct bch_fs *c, struct btree *b, + struct bkey_s_c k, + bool updated_range, int rw) { - return __bch2_bkey_invalid(c, k, btree_node_type(b), READ, err) ?: - (!updated_range ? bch2_bkey_in_btree_node(c, b, k, err) : 0) ?: - (rw == WRITE ? bch2_bkey_val_invalid(c, k, READ, err) : 0); + return __bch2_bkey_validate(c, k, btree_node_type(b), 0) ?: + (!updated_range ? bch2_bkey_in_btree_node(c, b, k, 0) : 0) ?: + (rw == WRITE ? bch2_bkey_val_validate(c, k, 0) : 0); } static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, @@ -858,12 +857,9 @@ static bool bkey_packed_valid(struct bch_fs *c, struct btree *b, if (!bkeyp_u64s_valid(&b->format, k)) return false; - struct printbuf buf = PRINTBUF; struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - bool ret = __bch2_bkey_invalid(c, u.s_c, btree_node_type(b), READ, &buf); - printbuf_exit(&buf); - return ret; + return !__bch2_bkey_validate(c, u.s_c, btree_node_type(b), BCH_VALIDATE_silent); } static int validate_bset_keys(struct bch_fs *c, struct btree *b, @@ -915,19 +911,11 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b, u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - if (bset_key_invalid(c, b, u.s_c, updated_range, write, &buf)) { - printbuf_reset(&buf); - bset_key_invalid(c, b, u.s_c, updated_range, write, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "invalid bkey: %s", buf.buf); + ret = bset_key_validate(c, b, u.s_c, updated_range, write); + if (ret == -BCH_ERR_fsck_delete_bkey) goto drop_this_key; - } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(b->c.level, b->c.btree_id, version, @@ -1228,23 +1216,10 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, struct bkey tmp; struct bkey_s u = __bkey_disassemble(b, k, &tmp); - printbuf_reset(&buf); - - if (bch2_bkey_val_invalid(c, u.s_c, READ, &buf) || + ret = bch2_bkey_val_validate(c, u.s_c, READ); + if (ret == -BCH_ERR_fsck_delete_bkey || (bch2_inject_invalid_keys && !bversion_cmp(u.k->version, MAX_VERSION))) { - printbuf_reset(&buf); - - prt_printf(&buf, "invalid bkey: "); - bch2_bkey_val_invalid(c, u.s_c, READ, &buf); - prt_printf(&buf, "\n "); - bch2_bkey_val_to_text(&buf, c, u.s_c); - - btree_err(-BCH_ERR_btree_node_read_err_fixable, - c, NULL, b, i, k, - btree_node_bad_bkey, - "%s", buf.buf); - btree_keys_account_key_drop(&b->nr, 0, k); i->u64s = cpu_to_le16(le16_to_cpu(i->u64s) - k->u64s); @@ -1253,6 +1228,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, set_btree_bset_end(b, b->set); continue; } + if (ret) + goto fsck_err; if (u.k->type == KEY_TYPE_btree_ptr_v2) { struct bkey_s_btree_ptr_v2 bp = bkey_s_to_btree_ptr_v2(u); @@ -1767,6 +1744,8 @@ static int __bch2_btree_root_read(struct btree_trans *trans, enum btree_id id, set_btree_node_read_in_flight(b); + /* we can't pass the trans to read_done() for fsck errors, so it must be unlocked */ + bch2_trans_unlock(trans); bch2_btree_node_read(trans, b, true); if (btree_node_read_error(b)) { @@ -1952,18 +1931,14 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - struct printbuf buf = PRINTBUF; bool saw_error; - int ret; - - ret = bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), - BKEY_TYPE_btree, WRITE, &buf); - if (ret) - bch2_fs_inconsistent(c, "invalid btree node key before write: %s", buf.buf); - printbuf_exit(&buf); - if (ret) + int ret = bch2_bkey_validate(c, bkey_i_to_s_c(&b->key), + BKEY_TYPE_btree, WRITE); + if (ret) { + bch2_fs_inconsistent(c, "invalid btree node key before write"); return ret; + } ret = validate_bset_keys(c, b, i, WRITE, false, &saw_error) ?: validate_bset(c, NULL, b, i, b->written, sectors, WRITE, false, &saw_error); diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 36872207f09b22ac597d18b87e18ece7b07a92fc..2e84d22e17bdd616b681625d4b022e6d4e2a3dda 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -1900,6 +1900,7 @@ err: goto out; } +/* Only kept for -tools */ struct btree *bch2_btree_iter_peek_node_and_restart(struct btree_iter *iter) { struct btree *b; @@ -1921,6 +1922,11 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) bch2_trans_verify_not_in_restart(trans); bch2_btree_iter_verify(iter); + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (ret) + goto err; + + struct btree_path *path = btree_iter_path(trans, iter); /* already at end? */ diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index c7725865309c093647a1a7ade9dd0dfc6a458e0d..222b7ce8a901e34e8078985b991ba5850e3767ed 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -569,6 +569,15 @@ static inline struct bkey_s_c bch2_bkey_get_iter(struct btree_trans *trans, bkey_s_c_to_##_type(__bch2_bkey_get_iter(_trans, _iter, \ _btree_id, _pos, _flags, KEY_TYPE_##_type)) +#define bkey_val_copy(_dst_v, _src_k) \ +do { \ + unsigned b = min_t(unsigned, sizeof(*_dst_v), \ + bkey_val_bytes(_src_k.k)); \ + memcpy(_dst_v, _src_k.v, b); \ + if (b < sizeof(*_dst_v)) \ + memset((void *) (_dst_v) + b, 0, sizeof(*_dst_v) - b); \ +} while (0) + static inline int __bch2_bkey_get_val_typed(struct btree_trans *trans, unsigned btree_id, struct bpos pos, unsigned flags, unsigned type, @@ -600,23 +609,35 @@ void bch2_trans_srcu_unlock(struct btree_trans *); u32 bch2_trans_begin(struct btree_trans *); -/* - * XXX - * this does not handle transaction restarts from bch2_btree_iter_next_node() - * correctly - */ -#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _locks_want, _depth, _flags, _b, _ret) \ - for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \ - _start, _locks_want, _depth, _flags); \ - (_b) = bch2_btree_iter_peek_node_and_restart(&(_iter)), \ - !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b); \ - (_b) = bch2_btree_iter_next_node(&(_iter))) +#define __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + _locks_want, _depth, _flags, _b, _do) \ +({ \ + bch2_trans_begin((_trans)); \ + \ + struct btree_iter _iter; \ + bch2_trans_node_iter_init((_trans), &_iter, (_btree_id), \ + _start, _locks_want, _depth, _flags); \ + int _ret3 = 0; \ + do { \ + _ret3 = lockrestart_do((_trans), ({ \ + struct btree *_b = bch2_btree_iter_peek_node(&_iter); \ + if (!_b) \ + break; \ + \ + PTR_ERR_OR_ZERO(_b) ?: (_do); \ + })) ?: \ + lockrestart_do((_trans), \ + PTR_ERR_OR_ZERO(bch2_btree_iter_next_node(&_iter))); \ + } while (!_ret3); \ + \ + bch2_trans_iter_exit((_trans), &(_iter)); \ + _ret3; \ +}) #define for_each_btree_node(_trans, _iter, _btree_id, _start, \ - _flags, _b, _ret) \ - __for_each_btree_node(_trans, _iter, _btree_id, _start, \ - 0, 0, _flags, _b, _ret) + _flags, _b, _do) \ + __for_each_btree_node(_trans, _iter, _btree_id, _start, \ + 0, 0, _flags, _b, _do) static inline struct bkey_s_c bch2_btree_iter_peek_prev_type(struct btree_iter *iter, unsigned flags) diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index f2f2e525460b578b33f592c492c8bab89971ef63..fda7998734cbc1acd00e0c4a171ded929c9f8fda 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -497,11 +497,6 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, struct btree_path path->l[1].b = NULL; - if (bch2_btree_node_relock_notrace(trans, path, 0)) { - path->uptodate = BTREE_ITER_UPTODATE; - return 0; - } - int ret; do { ret = btree_path_traverse_cached_fast(trans, path); @@ -731,6 +726,7 @@ void bch2_btree_key_cache_drop(struct btree_trans *trans, mark_btree_node_locked(trans, path, 0, BTREE_NODE_UNLOCKED); btree_path_set_dirty(path, BTREE_ITER_NEED_TRAVERSE); + path->should_be_locked = false; } static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, @@ -782,6 +778,20 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); + + /* + * Scanning is expensive while a rehash is in progress - most elements + * will be on the new hashtable, if it's in progress + * + * A rehash could still start while we're scanning - that's ok, we'll + * still see most elements. + */ + if (unlikely(tbl->nest)) { + rcu_read_unlock(); + srcu_read_unlock(&c->btree_trans_barrier, srcu_idx); + return SHRINK_STOP; + } + if (bc->shrink_iter >= tbl->size) bc->shrink_iter = 0; start = bc->shrink_iter; @@ -789,7 +799,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink, do { struct rhash_head *pos, *next; - pos = rht_ptr_rcu(rht_bucket(tbl, bc->shrink_iter)); + pos = rht_ptr_rcu(&tbl->buckets[bc->shrink_iter]); while (!rht_is_a_nulls(pos)) { next = rht_dereference_bucket_rcu(pos->next, tbl, bc->shrink_iter); @@ -870,12 +880,22 @@ void bch2_fs_btree_key_cache_exit(struct btree_key_cache *bc) while (atomic_long_read(&bc->nr_keys)) { rcu_read_lock(); tbl = rht_dereference_rcu(bc->table.tbl, &bc->table); - if (tbl) + if (tbl) { + if (tbl->nest) { + /* wait for in progress rehash */ + rcu_read_unlock(); + mutex_lock(&bc->table.mutex); + mutex_unlock(&bc->table.mutex); + rcu_read_lock(); + continue; + } for (i = 0; i < tbl->size; i++) - rht_for_each_entry_rcu(ck, pos, tbl, i, hash) { + while (pos = rht_ptr_rcu(&tbl->buckets[i]), !rht_is_a_nulls(pos)) { + ck = container_of(pos, struct bkey_cached, hash); bkey_cached_evict(bc, ck); list_add(&ck->list, &items); } + } rcu_read_unlock(); } diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h index e6b2cd0dd2c1afc2a21c628763422c2a2a314f69..51d6289b8dee3205ee5e8f8292906a588c92d1c0 100644 --- a/fs/bcachefs/btree_key_cache.h +++ b/fs/bcachefs/btree_key_cache.h @@ -11,13 +11,27 @@ static inline size_t bch2_nr_btree_keys_need_flush(struct bch_fs *c) return max_t(ssize_t, 0, nr_dirty - max_dirty); } -static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +static inline ssize_t __bch2_btree_key_cache_must_wait(struct bch_fs *c) { size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); size_t max_dirty = 4096 + (nr_keys * 3) / 4; - return nr_dirty > max_dirty; + return nr_dirty - max_dirty; +} + +static inline bool bch2_btree_key_cache_must_wait(struct bch_fs *c) +{ + return __bch2_btree_key_cache_must_wait(c) > 0; +} + +static inline bool bch2_btree_key_cache_wait_done(struct bch_fs *c) +{ + size_t nr_dirty = atomic_long_read(&c->btree_key_cache.nr_dirty); + size_t nr_keys = atomic_long_read(&c->btree_key_cache.nr_keys); + size_t max_dirty = 2048 + (nr_keys * 5) / 8; + + return nr_dirty <= max_dirty; } int bch2_btree_key_cache_journal_flush(struct journal *, diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index 0011072263778b3bfab0a870e0f62b486711ca98..b28c649c68389fb5d261019002ba55175a2afe07 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -530,7 +530,7 @@ int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree, bch_verbose(c, "%s(): recovering %s", __func__, buf.buf); printbuf_exit(&buf); - BUG_ON(bch2_bkey_invalid(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0, NULL)); + BUG_ON(bch2_bkey_validate(c, bkey_i_to_s_c(&tmp.k), BKEY_TYPE_btree, 0)); ret = bch2_journal_key_insert(c, btree, level + 1, &tmp.k); if (ret) diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c index cca336fe46e9b44bd887de63c3bbd1aca2f52202..a0101d9c5d83f6b8a9f06a07d4215cb36d85a651 100644 --- a/fs/bcachefs/btree_trans_commit.c +++ b/fs/bcachefs/btree_trans_commit.c @@ -712,7 +712,7 @@ bch2_trans_commit_write_locked(struct btree_trans *trans, unsigned flags, a->k.version = journal_pos_to_bversion(&trans->journal_res, (u64 *) entry - (u64 *) trans->journal_entries); BUG_ON(bversion_zero(a->k.version)); - ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false); + ret = bch2_accounting_mem_mod_locked(trans, accounting_i_to_s_c(a), false, false); if (ret) goto revert_fs_usage; } @@ -798,7 +798,7 @@ revert_fs_usage: struct bkey_s_accounting a = bkey_i_to_s_accounting(entry2->start); bch2_accounting_neg(a); - bch2_accounting_mem_mod_locked(trans, a.c, false); + bch2_accounting_mem_mod_locked(trans, a.c, false, false); bch2_accounting_neg(a); } percpu_up_read(&c->mark_lock); @@ -818,50 +818,6 @@ static noinline void bch2_drop_overwrites_from_journal(struct btree_trans *trans bch2_journal_key_overwritten(trans->c, i->btree_id, i->level, i->k->k.p); } -static noinline int bch2_trans_commit_bkey_invalid(struct btree_trans *trans, - enum bch_validate_flags flags, - struct btree_insert_entry *i, - struct printbuf *err) -{ - struct bch_fs *c = trans->c; - - printbuf_reset(err); - prt_printf(err, "invalid bkey on insert from %s -> %ps\n", - trans->fn, (void *) i->ip_allocated); - printbuf_indent_add(err, 2); - - bch2_bkey_val_to_text(err, c, bkey_i_to_s_c(i->k)); - prt_newline(err); - - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type, flags, err); - bch2_print_string_as_lines(KERN_ERR, err->buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - -static noinline int bch2_trans_commit_journal_entry_invalid(struct btree_trans *trans, - struct jset_entry *i) -{ - struct bch_fs *c = trans->c; - struct printbuf buf = PRINTBUF; - - prt_printf(&buf, "invalid bkey on insert from %s\n", trans->fn); - printbuf_indent_add(&buf, 2); - - bch2_journal_entry_to_text(&buf, c, i); - prt_newline(&buf); - - bch2_print_string_as_lines(KERN_ERR, buf.buf); - - bch2_inconsistent_error(c); - bch2_dump_trans_updates(trans); - - return -EINVAL; -} - static int bch2_trans_commit_journal_pin_flush(struct journal *j, struct journal_entry_pin *_pin, u64 seq) { @@ -927,7 +883,7 @@ static inline int do_bch2_trans_commit(struct btree_trans *trans, unsigned flags static int journal_reclaim_wait_done(struct bch_fs *c) { int ret = bch2_journal_error(&c->journal) ?: - !bch2_btree_key_cache_must_wait(c); + bch2_btree_key_cache_wait_done(c); if (!ret) journal_reclaim_kick(&c->journal); @@ -973,9 +929,13 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags, bch2_trans_unlock(trans); trace_and_count(c, trans_blocked_journal_reclaim, trans, trace_ip); + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], true); wait_event_freezable(c->journal.reclaim_wait, (ret = journal_reclaim_wait_done(c))); + + track_event_change(&c->times[BCH_TIME_blocked_key_cache_flush], false); + if (ret < 0) break; @@ -1060,20 +1020,19 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) goto out_reset; trans_for_each_update(trans, i) { - struct printbuf buf = PRINTBUF; enum bch_validate_flags invalid_flags = 0; if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), - i->bkey_type, invalid_flags, &buf))) - ret = bch2_trans_commit_bkey_invalid(trans, invalid_flags, i, &buf); - btree_insert_entry_checks(trans, i); - printbuf_exit(&buf); - - if (ret) + ret = bch2_bkey_validate(c, bkey_i_to_s_c(i->k), + i->bkey_type, invalid_flags); + if (unlikely(ret)){ + bch2_trans_inconsistent(trans, "invalid bkey on insert from %s -> %ps\n", + trans->fn, (void *) i->ip_allocated); return ret; + } + btree_insert_entry_checks(trans, i); } for (struct jset_entry *i = trans->journal_entries; @@ -1084,13 +1043,14 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags) if (!(flags & BCH_TRANS_COMMIT_no_journal_res)) invalid_flags |= BCH_VALIDATE_write|BCH_VALIDATE_commit; - if (unlikely(bch2_journal_entry_validate(c, NULL, i, - bcachefs_metadata_version_current, - CPU_BIG_ENDIAN, invalid_flags))) - ret = bch2_trans_commit_journal_entry_invalid(trans, i); - - if (ret) + ret = bch2_journal_entry_validate(c, NULL, i, + bcachefs_metadata_version_current, + CPU_BIG_ENDIAN, invalid_flags); + if (unlikely(ret)) { + bch2_trans_inconsistent(trans, "invalid journal entry on insert from %s\n", + trans->fn); return ret; + } } if (unlikely(!test_bit(BCH_FS_may_go_rw, &c->flags))) { diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 31ee50184be2369c110c4e6d9352d6ecd6ffd09f..8fd112026e7a34d7cf4782d0e04c923c4a4a5081 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -317,6 +317,12 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, : 0; int ret; + b = bch2_btree_node_mem_alloc(trans, interior_node); + if (IS_ERR(b)) + return b; + + BUG_ON(b->ob.nr); + mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { struct btree_alloc *a = @@ -325,10 +331,9 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, obs = a->ob; bkey_copy(&tmp.k, &a->k); mutex_unlock(&c->btree_reserve_cache_lock); - goto mem_alloc; + goto out; } mutex_unlock(&c->btree_reserve_cache_lock); - retry: ret = bch2_alloc_sectors_start_trans(trans, c->opts.metadata_target ?: @@ -341,7 +346,7 @@ retry: c->opts.metadata_replicas_required), watermark, 0, cl, &wp); if (unlikely(ret)) - return ERR_PTR(ret); + goto err; if (wp->sectors_free < btree_sectors(c)) { struct open_bucket *ob; @@ -360,19 +365,16 @@ retry: bch2_open_bucket_get(c, wp, &obs); bch2_alloc_sectors_done(c, wp); -mem_alloc: - b = bch2_btree_node_mem_alloc(trans, interior_node); - six_unlock_write(&b->c.lock); - six_unlock_intent(&b->c.lock); - - /* we hold cannibalize_lock: */ - BUG_ON(IS_ERR(b)); - BUG_ON(b->ob.nr); - +out: bkey_copy(&b->key, &tmp.k); b->ob = obs; + six_unlock_write(&b->c.lock); + six_unlock_intent(&b->c.lock); return b; +err: + bch2_btree_node_to_freelist(c, b); + return ERR_PTR(ret); } static struct btree *bch2_btree_node_alloc(struct btree_update *as, @@ -1264,7 +1266,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path, ret = bch2_btree_reserve_get(trans, as, nr_nodes, flags, &cl); bch2_trans_unlock(trans); - closure_sync(&cl); + bch2_wait_on_allocator(c, &cl); } while (bch2_err_matches(ret, BCH_ERR_operation_blocked)); } @@ -1364,18 +1366,10 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, if (unlikely(!test_bit(JOURNAL_replay_done, &c->journal.flags))) bch2_journal_key_overwritten(c, b->c.btree_id, b->c.level, insert->k.p); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf) ?: - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf)) { - printbuf_reset(&buf); - prt_printf(&buf, "inserting invalid bkey\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(insert)); - prt_printf(&buf, "\n "); - bch2_bkey_invalid(c, bkey_i_to_s_c(insert), - btree_node_type(b), WRITE, &buf); - bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), &buf); - - bch2_fs_inconsistent(c, "%s", buf.buf); + if (bch2_bkey_validate(c, bkey_i_to_s_c(insert), + btree_node_type(b), BCH_VALIDATE_write) ?: + bch2_bkey_in_btree_node(c, b, bkey_i_to_s_c(insert), BCH_VALIDATE_write)) { + bch2_fs_inconsistent(c, "%s: inserting invalid bkey", __func__); dump_stack(); } @@ -2447,6 +2441,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite } new_hash = bch2_btree_node_mem_alloc(trans, false); + ret = PTR_ERR_OR_ZERO(new_hash); + if (ret) + goto err; } path->intent_ref++; @@ -2454,14 +2451,9 @@ int bch2_btree_node_update_key(struct btree_trans *trans, struct btree_iter *ite commit_flags, skip_triggers); --path->intent_ref; - if (new_hash) { - mutex_lock(&c->btree_cache.lock); - list_move(&new_hash->list, &c->btree_cache.freeable); - mutex_unlock(&c->btree_cache.lock); - - six_unlock_write(&new_hash->c.lock); - six_unlock_intent(&new_hash->c.lock); - } + if (new_hash) + bch2_btree_node_to_freelist(c, new_hash); +err: closure_sync(&cl); bch2_btree_cache_cannibalize_unlock(trans); return ret; @@ -2530,6 +2522,10 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id b = bch2_btree_node_mem_alloc(trans, false); bch2_btree_cache_cannibalize_unlock(trans); + ret = PTR_ERR_OR_ZERO(b); + if (ret) + return ret; + set_btree_node_fake(b); set_btree_node_need_rewrite(b); b->c.level = level; @@ -2561,7 +2557,7 @@ int bch2_btree_root_alloc_fake_trans(struct btree_trans *trans, enum btree_id id void bch2_btree_root_alloc_fake(struct bch_fs *c, enum btree_id id, unsigned level) { - bch2_trans_run(c, bch2_btree_root_alloc_fake_trans(trans, id, level)); + bch2_trans_run(c, lockrestart_do(trans, bch2_btree_root_alloc_fake_trans(trans, id, level))); } static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update *as) diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 2650a0d246636959caf83b2f27445d47aec8094c..a2274429e7f4ad6cfb7517d746908caf083117eb 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -71,17 +71,21 @@ bch2_fs_usage_read_short(struct bch_fs *c) return ret; } -void bch2_dev_usage_to_text(struct printbuf *out, struct bch_dev_usage *usage) +void bch2_dev_usage_to_text(struct printbuf *out, + struct bch_dev *ca, + struct bch_dev_usage *usage) { prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); for (unsigned i = 0; i < BCH_DATA_NR; i++) { bch2_prt_data_type(out, i); prt_printf(out, "\t%llu\r%llu\r%llu\r\n", - usage->d[i].buckets, - usage->d[i].sectors, - usage->d[i].fragmented); + usage->d[i].buckets, + usage->d[i].sectors, + usage->d[i].fragmented); } + + prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); } static int bch2_check_fix_ptr(struct btree_trans *trans, @@ -695,7 +699,8 @@ err: static int __trigger_extent(struct btree_trans *trans, enum btree_id btree_id, unsigned level, struct bkey_s_c k, - enum btree_iter_update_trigger_flags flags) + enum btree_iter_update_trigger_flags flags, + s64 *replicas_sectors) { bool gc = flags & BTREE_TRIGGER_gc; struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); @@ -704,7 +709,6 @@ static int __trigger_extent(struct btree_trans *trans, enum bch_data_type data_type = bkey_is_btree_ptr(k.k) ? BCH_DATA_btree : BCH_DATA_user; - s64 replicas_sectors = 0; int ret = 0; struct disk_accounting_pos acc_replicas_key = { @@ -735,7 +739,7 @@ static int __trigger_extent(struct btree_trans *trans, if (ret) return ret; } else if (!p.has_ec) { - replicas_sectors += disk_sectors; + *replicas_sectors += disk_sectors; acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; } else { ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); @@ -773,7 +777,7 @@ static int __trigger_extent(struct btree_trans *trans, } if (acc_replicas_key.replicas.nr_devs) { - ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -783,7 +787,7 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_snapshot, .snapshot.id = k.k->p.snapshot, }; - ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); if (ret) return ret; } @@ -803,16 +807,21 @@ static int __trigger_extent(struct btree_trans *trans, .type = BCH_DISK_ACCOUNTING_btree, .btree.id = btree_id, }; - ret = bch2_disk_accounting_mod(trans, &acc_btree_key, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); if (ret) return ret; - } - - if (bch2_bkey_rebalance_opts(k)) { - struct disk_accounting_pos acc = { - .type = BCH_DISK_ACCOUNTING_rebalance_work, + } else { + bool insert = !(flags & BTREE_TRIGGER_overwrite); + struct disk_accounting_pos acc_inum_key = { + .type = BCH_DISK_ACCOUNTING_inum, + .inum.inum = k.k->p.inode, + }; + s64 v[3] = { + insert ? 1 : -1, + insert ? k.k->size : -((s64) k.k->size), + *replicas_sectors, }; - ret = bch2_disk_accounting_mod(trans, &acc, &replicas_sectors, 1, gc); + ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); if (ret) return ret; } @@ -825,6 +834,7 @@ int bch2_trigger_extent(struct btree_trans *trans, struct bkey_s_c old, struct bkey_s new, enum btree_iter_update_trigger_flags flags) { + struct bch_fs *c = trans->c; struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; @@ -840,21 +850,53 @@ int bch2_trigger_extent(struct btree_trans *trans, new_ptrs_bytes)) return 0; - if (flags & BTREE_TRIGGER_transactional) { - struct bch_fs *c = trans->c; - int mod = (int) bch2_bkey_needs_rebalance(c, new.s_c) - - (int) bch2_bkey_needs_rebalance(c, old); + if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { + s64 old_replicas_sectors = 0, new_replicas_sectors = 0; + + if (old.k->type) { + int ret = __trigger_extent(trans, btree, level, old, + flags & ~BTREE_TRIGGER_insert, + &old_replicas_sectors); + if (ret) + return ret; + } - if (mod) { + if (new.k->type) { + int ret = __trigger_extent(trans, btree, level, new.s_c, + flags & ~BTREE_TRIGGER_overwrite, + &new_replicas_sectors); + if (ret) + return ret; + } + + int need_rebalance_delta = 0; + s64 need_rebalance_sectors_delta = 0; + + s64 s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta -= s != 0; + need_rebalance_sectors_delta -= s; + + s = bch2_bkey_sectors_need_rebalance(c, old); + need_rebalance_delta += s != 0; + need_rebalance_sectors_delta += s; + + if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, - new.k->p, mod > 0); + new.k->p, need_rebalance_delta > 0); if (ret) return ret; } - } - if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) - return trigger_run_overwrite_then_insert(__trigger_extent, trans, btree, level, old, new, flags); + if (need_rebalance_sectors_delta) { + struct disk_accounting_pos acc = { + .type = BCH_DISK_ACCOUNTING_rebalance_work, + }; + int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, + flags & BTREE_TRIGGER_gc); + if (ret) + return ret; + } + } return 0; } @@ -897,7 +939,6 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, enum bch_data_type type, unsigned sectors) { - struct bch_fs *c = trans->c; struct btree_iter iter; int ret = 0; @@ -907,7 +948,7 @@ static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, return PTR_ERR(a); if (a->v.data_type && type && a->v.data_type != type) { - bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, + bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, bucket_metadata_type_mismatch, "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" "while marking %s", @@ -1028,13 +1069,18 @@ static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, enum btree_iter_update_trigger_flags flags) { - struct bch_sb_layout *layout = &ca->disk_sb.sb->layout; + struct bch_fs *c = trans->c; + + mutex_lock(&c->sb_lock); + struct bch_sb_layout layout = ca->disk_sb.sb->layout; + mutex_unlock(&c->sb_lock); + u64 bucket = 0; unsigned i, bucket_sectors = 0; int ret; - for (i = 0; i < layout->nr_superblocks; i++) { - u64 offset = le64_to_cpu(layout->sb_offset[i]); + for (i = 0; i < layout.nr_superblocks; i++) { + u64 offset = le64_to_cpu(layout.sb_offset[i]); if (offset == BCH_SB_SECTOR) { ret = bch2_trans_mark_metadata_sectors(trans, ca, @@ -1045,7 +1091,7 @@ static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *c } ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, - offset + (1 << layout->sb_max_size_bits), + offset + (1 << layout.sb_max_size_bits), BCH_DATA_sb, &bucket, &bucket_sectors, flags); if (ret) return ret; diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 2d35eeb24a2d693e5117b2ebd11d0cf07a338490..edbdffd508fc3b0f1c78a47b33d554b043bc3ce7 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -212,7 +212,7 @@ static inline struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *ca) return ret; } -void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev_usage *); +void bch2_dev_usage_to_text(struct printbuf *, struct bch_dev *, struct bch_dev_usage *); static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_watermark watermark) { diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c index ec1b636ef78d075d1c2b6a9dd2b610a5ba8f274c..f9fb150eda706cb670a38ef9e167a896ad31e203 100644 --- a/fs/bcachefs/buckets_waiting_for_journal.c +++ b/fs/bcachefs/buckets_waiting_for_journal.c @@ -93,7 +93,7 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, .dev_bucket = (u64) dev << 56 | bucket, .journal_seq = journal_seq, }; - size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0; + size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0, nr_rehashes_this_size = 0; int ret = 0; mutex_lock(&b->lock); @@ -106,8 +106,8 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, for (i = 0; i < size; i++) nr_elements += t->d[i].journal_seq > flushed_seq; - new_bits = t->bits + (nr_elements * 3 > size); - + new_bits = ilog2(roundup_pow_of_two(nr_elements * 3)); +realloc: n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL); if (!n) { ret = -BCH_ERR_ENOMEM_buckets_waiting_for_journal_set; @@ -115,7 +115,16 @@ int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b, } retry_rehash: + if (nr_rehashes_this_size == 3) { + new_bits++; + nr_rehashes_this_size = 0; + kvfree(n); + goto realloc; + } + nr_rehashes++; + nr_rehashes_this_size++; + bucket_table_init(n, new_bits); tmp = new; diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c index 0087b8555ead3c20459ac5bbaaf883e4c389901e..65176d51b502e615a522d7533a2efcc986979fe0 100644 --- a/fs/bcachefs/data_update.c +++ b/fs/bcachefs/data_update.c @@ -20,6 +20,76 @@ #include "subvolume.h" #include "trace.h" +static void bkey_put_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) + bch2_dev_put(bch2_dev_have_ref(c, ptr->dev)); +} + +static bool bkey_get_dev_refs(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + if (!bch2_dev_tryget(c, ptr->dev)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); + } + return false; + } + } + return true; +} + +static void bkey_nocow_unlock(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } +} + +static bool bkey_nocow_lock(struct bch_fs *c, struct moving_context *ctxt, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); + struct bpos bucket = PTR_BUCKET_POS(ca, ptr); + + if (ctxt) { + bool locked; + + move_ctxt_wait_event(ctxt, + (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) || + list_empty(&ctxt->ios)); + + if (!locked) + bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); + } else { + if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { + bkey_for_each_ptr(ptrs, ptr2) { + if (ptr2 == ptr) + break; + + bucket = PTR_BUCKET_POS(ca, ptr2); + bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); + } + return false; + } + } + } + return true; +} + static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k) { if (trace_move_extent_finish_enabled()) { @@ -250,10 +320,8 @@ restart_drop_extra_replicas: * it's been hard to reproduce, so this should give us some more * information when it does occur: */ - struct printbuf err = PRINTBUF; - int invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), 0, &err); - printbuf_exit(&err); - + int invalid = bch2_bkey_validate(c, bkey_i_to_s_c(insert), __btree_node_type(0, m->btree_id), + BCH_VALIDATE_commit); if (invalid) { struct printbuf buf = PRINTBUF; @@ -357,17 +425,11 @@ void bch2_data_update_read_done(struct data_update *m, void bch2_data_update_exit(struct data_update *update) { struct bch_fs *c = update->op.c; - struct bkey_ptrs_c ptrs = - bch2_bkey_ptrs_c(bkey_i_to_s_c(update->k.k)); - - bkey_for_each_ptr(ptrs, ptr) { - struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev); - if (c->opts.nocow_enabled) - bch2_bucket_nocow_unlock(&c->nocow_locks, - PTR_BUCKET_POS(ca, ptr), 0); - bch2_dev_put(ca); - } + struct bkey_s_c k = bkey_i_to_s_c(update->k.k); + if (c->opts.nocow_enabled) + bkey_nocow_unlock(c, k); + bkey_put_dev_refs(c, k); bch2_bkey_buf_exit(&update->k, c); bch2_disk_reservation_put(c, &update->op.res); bch2_bio_free_pages_pool(c, &update->op.wbio.bio); @@ -477,6 +539,9 @@ void bch2_data_update_opts_to_text(struct printbuf *out, struct bch_fs *c, bch2_compression_opt_to_text(out, background_compression(*io_opts)); prt_newline(out); + prt_str(out, "opts.replicas:\t"); + prt_u64(out, io_opts->data_replicas); + prt_str(out, "extra replicas:\t"); prt_u64(out, data_opts->extra_replicas); } @@ -545,7 +610,6 @@ int bch2_data_update_init(struct btree_trans *trans, const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned i, reserve_sectors = k.k->size * data_opts.extra_replicas; - unsigned ptrs_locked = 0; int ret = 0; /* @@ -556,6 +620,15 @@ int bch2_data_update_init(struct btree_trans *trans, if (unlikely(k.k->p.snapshot && !bch2_snapshot_equiv(c, k.k->p.snapshot))) return -BCH_ERR_data_update_done; + if (!bkey_get_dev_refs(c, k)) + return -BCH_ERR_data_update_done; + + if (c->opts.nocow_enabled && + !bkey_nocow_lock(c, ctxt, k)) { + bkey_put_dev_refs(c, k); + return -BCH_ERR_nocow_lock_blocked; + } + bch2_bkey_buf_init(&m->k); bch2_bkey_buf_reassemble(&m->k, c, k); m->btree_id = btree_id; @@ -577,40 +650,24 @@ int bch2_data_update_init(struct btree_trans *trans, m->op.compression_opt = background_compression(io_opts); m->op.watermark = m->data_opts.btree_insert_flags & BCH_WATERMARK_MASK; - bkey_for_each_ptr(ptrs, ptr) { - if (!bch2_dev_tryget(c, ptr->dev)) { - bkey_for_each_ptr(ptrs, ptr2) { - if (ptr2 == ptr) - break; - bch2_dev_put(bch2_dev_have_ref(c, ptr2->dev)); - } - return -BCH_ERR_data_update_done; - } - } - unsigned durability_have = 0, durability_removing = 0; i = 0; bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - bool locked; - - rcu_read_lock(); - if (((1U << i) & m->data_opts.rewrite_ptrs)) { - BUG_ON(p.ptr.cached); - - if (crc_is_compressed(p.crc)) - reserve_sectors += k.k->size; - - m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); - durability_removing += bch2_extent_ptr_desired_durability(c, &p); - } else if (!p.ptr.cached && - !((1U << i) & m->data_opts.kill_ptrs)) { - bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); - durability_have += bch2_extent_ptr_durability(c, &p); + if (!p.ptr.cached) { + rcu_read_lock(); + if (BIT(i) & m->data_opts.rewrite_ptrs) { + if (crc_is_compressed(p.crc)) + reserve_sectors += k.k->size; + + m->op.nr_replicas += bch2_extent_ptr_desired_durability(c, &p); + durability_removing += bch2_extent_ptr_desired_durability(c, &p); + } else if (!(BIT(i) & m->data_opts.kill_ptrs)) { + bch2_dev_list_add_dev(&m->op.devs_have, p.ptr.dev); + durability_have += bch2_extent_ptr_durability(c, &p); + } + rcu_read_unlock(); } - rcu_read_unlock(); /* * op->csum_type is normally initialized from the fs/file's @@ -625,24 +682,6 @@ int bch2_data_update_init(struct btree_trans *trans, if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) m->op.incompressible = true; - if (c->opts.nocow_enabled) { - if (ctxt) { - move_ctxt_wait_event(ctxt, - (locked = bch2_bucket_nocow_trylock(&c->nocow_locks, - bucket, 0)) || - list_empty(&ctxt->ios)); - - if (!locked) - bch2_bucket_nocow_lock(&c->nocow_locks, bucket, 0); - } else { - if (!bch2_bucket_nocow_trylock(&c->nocow_locks, bucket, 0)) { - ret = -BCH_ERR_nocow_lock_blocked; - goto err; - } - } - ptrs_locked |= (1U << i); - } - i++; } @@ -656,16 +695,6 @@ int bch2_data_update_init(struct btree_trans *trans, * Increasing replication is an explicit operation triggered by * rereplicate, currently, so that users don't get an unexpected -ENOSPC */ - if (!(m->data_opts.write_flags & BCH_WRITE_CACHED) && - !durability_required) { - m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; - m->data_opts.rewrite_ptrs = 0; - /* if iter == NULL, it's just a promote */ - if (iter) - ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); - goto done; - } - m->op.nr_replicas = min(durability_removing, durability_required) + m->data_opts.extra_replicas; @@ -677,48 +706,38 @@ int bch2_data_update_init(struct btree_trans *trans, if (!(durability_have + durability_removing)) m->op.nr_replicas = max((unsigned) m->op.nr_replicas, 1); - if (!m->op.nr_replicas) { - struct printbuf buf = PRINTBUF; + m->op.nr_replicas_required = m->op.nr_replicas; - bch2_data_update_to_text(&buf, m); - WARN(1, "trying to move an extent, but nr_replicas=0\n%s", buf.buf); - printbuf_exit(&buf); - ret = -BCH_ERR_data_update_done; - goto done; + /* + * It might turn out that we don't need any new replicas, if the + * replicas or durability settings have been changed since the extent + * was written: + */ + if (!m->op.nr_replicas) { + m->data_opts.kill_ptrs |= m->data_opts.rewrite_ptrs; + m->data_opts.rewrite_ptrs = 0; + /* if iter == NULL, it's just a promote */ + if (iter) + ret = bch2_extent_drop_ptrs(trans, iter, k, m->data_opts); + goto out; } - m->op.nr_replicas_required = m->op.nr_replicas; - if (reserve_sectors) { ret = bch2_disk_reservation_add(c, &m->op.res, reserve_sectors, m->data_opts.extra_replicas ? 0 : BCH_DISK_RESERVATION_NOFAIL); if (ret) - goto err; + goto out; } if (bkey_extent_is_unwritten(k)) { bch2_update_unwritten_extent(trans, m); - goto done; + goto out; } return 0; -err: - i = 0; - bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { - struct bch_dev *ca = bch2_dev_have_ref(c, p.ptr.dev); - struct bpos bucket = PTR_BUCKET_POS(ca, &p.ptr); - if ((1U << i) & ptrs_locked) - bch2_bucket_nocow_unlock(&c->nocow_locks, bucket, 0); - bch2_dev_put(ca); - i++; - } - - bch2_bkey_buf_exit(&m->k, c); - bch2_bio_free_pages_pool(c, &m->op.wbio.bio); - return ret; -done: +out: bch2_data_update_exit(m); return ret ?: -BCH_ERR_data_update_done; } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index ebabab171fe5eac2fe665094f1070e65de5e891c..45aec1afdb0e31195ffc76a19cc9d048669be299 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -397,47 +397,27 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, size_t size, loff_t *ppos) { struct dump_iter *i = file->private_data; - struct btree_trans *trans; - struct btree_iter iter; - struct btree *b; - ssize_t ret; i->ubuf = buf; i->size = size; i->ret = 0; - ret = flush_buf(i); + ssize_t ret = flush_buf(i); if (ret) return ret; if (bpos_eq(SPOS_MAX, i->from)) return i->ret; - trans = bch2_trans_get(i->c); -retry: - bch2_trans_begin(trans); - - for_each_btree_node(trans, iter, i->id, i->from, 0, b, ret) { - bch2_btree_node_to_text(&i->buf, i->c, b); - i->from = !bpos_eq(SPOS_MAX, b->key.k.p) - ? bpos_successor(b->key.k.p) - : b->key.k.p; - - ret = drop_locks_do(trans, flush_buf(i)); - if (ret) - break; - } - bch2_trans_iter_exit(trans, &iter); - - if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) - goto retry; - - bch2_trans_put(trans); - - if (!ret) - ret = flush_buf(i); + return bch2_trans_run(i->c, + for_each_btree_node(trans, iter, i->id, i->from, 0, b, ({ + bch2_btree_node_to_text(&i->buf, i->c, b); + i->from = !bpos_eq(SPOS_MAX, b->key.k.p) + ? bpos_successor(b->key.k.p) + : b->key.k.p; - return ret ?: i->ret; + drop_locks_do(trans, flush_buf(i)); + }))) ?: i->ret; } static const struct file_operations btree_format_debug_ops = { diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index d743da89308ef768a400100534cd7371872ce9b5..32bfdf19289a9dc4789ade6da8e9348dcb82afa9 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -100,20 +100,19 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { .is_visible = dirent_is_visible, }; -int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_dirent_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); struct qstr d_name = bch2_dirent_get_name(d); int ret = 0; - bkey_fsck_err_on(!d_name.len, c, err, - dirent_empty_name, + bkey_fsck_err_on(!d_name.len, + c, dirent_empty_name, "empty name"); - bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), c, err, - dirent_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > dirent_val_u64s(d_name.len), + c, dirent_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), dirent_val_u64s(d_name.len)); @@ -121,27 +120,27 @@ int bch2_dirent_invalid(struct bch_fs *c, struct bkey_s_c k, * Check new keys don't exceed the max length * (older keys may be larger.) */ - bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, c, err, - dirent_name_too_long, + bkey_fsck_err_on((flags & BCH_VALIDATE_commit) && d_name.len > BCH_NAME_MAX, + c, dirent_name_too_long, "dirent name too big (%u > %u)", d_name.len, BCH_NAME_MAX); - bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), c, err, - dirent_name_embedded_nul, + bkey_fsck_err_on(d_name.len != strnlen(d_name.name, d_name.len), + c, dirent_name_embedded_nul, "dirent has stray data after name's NUL"); bkey_fsck_err_on((d_name.len == 1 && !memcmp(d_name.name, ".", 1)) || - (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), c, err, - dirent_name_dot_or_dotdot, + (d_name.len == 2 && !memcmp(d_name.name, "..", 2)), + c, dirent_name_dot_or_dotdot, "invalid name"); - bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), c, err, - dirent_name_has_slash, + bkey_fsck_err_on(memchr(d_name.name, '/', d_name.len), + c, dirent_name_has_slash, "name with /"); bkey_fsck_err_on(d.v->d_type != DT_SUBVOL && - le64_to_cpu(d.v->d_inum) == d.k->p.inode, c, err, - dirent_to_itself, + le64_to_cpu(d.v->d_inum) == d.k->p.inode, + c, dirent_to_itself, "dirent points to own directory"); fsck_err: return ret; diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 24037e6e0a0948d0cb3cc6c78e1f530cd3f79e6e..8945145865c53b4b46ee82d86b55689f5fed6ab4 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -7,12 +7,11 @@ enum bch_validate_flags; extern const struct bch_hash_desc bch2_dirent_hash_desc; -int bch2_dirent_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_dirent_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_dirent ((struct bkey_ops) { \ - .key_invalid = bch2_dirent_invalid, \ + .key_validate = bch2_dirent_validate, \ .val_to_text = bch2_dirent_to_text, \ .min_val_size = 16, \ }) diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c index dcdd59249c23eb5f896772da06f5d87a55a74fd4..e972e2bca546a9335320948287b1cc1b6c7b9f60 100644 --- a/fs/bcachefs/disk_accounting.c +++ b/fs/bcachefs/disk_accounting.c @@ -114,11 +114,73 @@ int bch2_mod_dev_cached_sectors(struct btree_trans *trans, return bch2_disk_accounting_mod(trans, &acc, §ors, 1, gc); } -int bch2_accounting_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +static inline bool is_zero(char *start, char *end) { - return 0; + BUG_ON(start > end); + + for (; start < end; start++) + if (*start) + return false; + return true; +} + +#define field_end(p, member) (((void *) (&p.member)) + sizeof(p.member)) + +int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) +{ + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + void *end = &acc_k + 1; + int ret = 0; + + switch (acc_k.type) { + case BCH_DISK_ACCOUNTING_nr_inodes: + end = field_end(acc_k, nr_inodes); + break; + case BCH_DISK_ACCOUNTING_persistent_reserved: + end = field_end(acc_k, persistent_reserved); + break; + case BCH_DISK_ACCOUNTING_replicas: + bkey_fsck_err_on(!acc_k.replicas.nr_devs, + c, accounting_key_replicas_nr_devs_0, + "accounting key replicas entry with nr_devs=0"); + + bkey_fsck_err_on(acc_k.replicas.nr_required > acc_k.replicas.nr_devs || + (acc_k.replicas.nr_required > 1 && + acc_k.replicas.nr_required == acc_k.replicas.nr_devs), + c, accounting_key_replicas_nr_required_bad, + "accounting key replicas entry with bad nr_required"); + + for (unsigned i = 0; i + 1 < acc_k.replicas.nr_devs; i++) + bkey_fsck_err_on(acc_k.replicas.devs[i] >= acc_k.replicas.devs[i + 1], + c, accounting_key_replicas_devs_unsorted, + "accounting key replicas entry with unsorted devs"); + + end = (void *) &acc_k.replicas + replicas_entry_bytes(&acc_k.replicas); + break; + case BCH_DISK_ACCOUNTING_dev_data_type: + end = field_end(acc_k, dev_data_type); + break; + case BCH_DISK_ACCOUNTING_compression: + end = field_end(acc_k, compression); + break; + case BCH_DISK_ACCOUNTING_snapshot: + end = field_end(acc_k, snapshot); + break; + case BCH_DISK_ACCOUNTING_btree: + end = field_end(acc_k, btree); + break; + case BCH_DISK_ACCOUNTING_rebalance_work: + end = field_end(acc_k, rebalance_work); + break; + } + + bkey_fsck_err_on(!is_zero(end, (void *) (&acc_k + 1)), + c, accounting_key_junk_at_end, + "junk at end of accounting key"); +fsck_err: + return ret; } void bch2_accounting_key_to_text(struct printbuf *out, struct disk_accounting_pos *k) @@ -465,6 +527,9 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct disk_accounting_pos acc_k; bpos_to_disk_accounting_pos(&acc_k, e->pos); + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + u64 src_v[BCH_ACCOUNTING_MAX_COUNTERS]; u64 dst_v[BCH_ACCOUNTING_MAX_COUNTERS]; @@ -501,7 +566,7 @@ int bch2_gc_accounting_done(struct bch_fs *c) struct { __BKEY_PADDED(k, BCH_ACCOUNTING_MAX_COUNTERS); } k_i; accounting_key_init(&k_i.k, &acc_k, src_v, nr); - bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false); + bch2_accounting_mem_mod_locked(trans, bkey_i_to_s_c_accounting(&k_i.k), false, false); preempt_disable(); struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); @@ -530,7 +595,7 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k) return 0; percpu_down_read(&c->mark_lock); - int ret = __bch2_accounting_mem_mod(c, bkey_s_c_to_accounting(k), false); + int ret = bch2_accounting_mem_mod_locked(trans, bkey_s_c_to_accounting(k), false, true); percpu_up_read(&c->mark_lock); if (bch2_accounting_key_is_zero(bkey_s_c_to_accounting(k)) && @@ -697,6 +762,15 @@ void bch2_verify_accounting_clean(struct bch_fs *c) struct bkey_s_c_accounting a = bkey_s_c_to_accounting(k); unsigned nr = bch2_accounting_counters(k.k); + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, k.k->p); + + if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR) + continue; + + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + continue; + bch2_accounting_mem_read(c, k.k->p, v, nr); if (memcmp(a.v->d, v, nr * sizeof(u64))) { @@ -712,9 +786,6 @@ void bch2_verify_accounting_clean(struct bch_fs *c) mismatch = true; } - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); - switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: base.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h index 3d3f25e08b69614fd4fd817072db53ec8eb85202..f29fd0dd9581fb9b70cf4201c553b4bf6d0faa7f 100644 --- a/fs/bcachefs/disk_accounting.h +++ b/fs/bcachefs/disk_accounting.h @@ -82,14 +82,13 @@ int bch2_disk_accounting_mod(struct btree_trans *, struct disk_accounting_pos *, s64 *, unsigned, bool); int bch2_mod_dev_cached_sectors(struct btree_trans *, unsigned, s64, bool); -int bch2_accounting_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_accounting_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_accounting_key_to_text(struct printbuf *, struct disk_accounting_pos *); void bch2_accounting_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_accounting_swab(struct bkey_s); #define bch2_bkey_ops_accounting ((struct bkey_ops) { \ - .key_invalid = bch2_accounting_invalid, \ + .key_validate = bch2_accounting_validate, \ .val_to_text = bch2_accounting_to_text, \ .swab = bch2_accounting_swab, \ .min_val_size = 8, \ @@ -107,41 +106,20 @@ static inline int accounting_pos_cmp(const void *_l, const void *_r) int bch2_accounting_mem_insert(struct bch_fs *, struct bkey_s_c_accounting, bool); void bch2_accounting_mem_gc(struct bch_fs *); -static inline int __bch2_accounting_mem_mod(struct bch_fs *c, struct bkey_s_c_accounting a, bool gc) -{ - struct bch_accounting_mem *acc = &c->accounting; - unsigned idx; - - EBUG_ON(gc && !acc->gc_running); - - while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), - accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { - int ret = bch2_accounting_mem_insert(c, a, gc); - if (ret) - return ret; - } - - struct accounting_mem_entry *e = &acc->k.data[idx]; - - EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); - - for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) - this_cpu_add(e->v[gc][i], a.v->d[i]); - return 0; -} - /* * Update in memory counters so they match the btree update we're doing; called * from transaction commit path */ -static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) +static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc, bool read) { struct bch_fs *c = trans->c; + struct disk_accounting_pos acc_k; + bpos_to_disk_accounting_pos(&acc_k, a.k->p); - if (!gc) { - struct disk_accounting_pos acc_k; - bpos_to_disk_accounting_pos(&acc_k, a.k->p); + if (acc_k.type == BCH_DISK_ACCOUNTING_inum) + return 0; + if (!gc && !read) { switch (acc_k.type) { case BCH_DISK_ACCOUNTING_persistent_reserved: trans->fs_usage_delta.reserved += acc_k.persistent_reserved.nr_replicas * a.v->d[0]; @@ -162,13 +140,31 @@ static inline int bch2_accounting_mem_mod_locked(struct btree_trans *trans, stru } } - return __bch2_accounting_mem_mod(c, a, gc); + struct bch_accounting_mem *acc = &c->accounting; + unsigned idx; + + EBUG_ON(gc && !acc->gc_running); + + while ((idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), + accounting_pos_cmp, &a.k->p)) >= acc->k.nr) { + int ret = bch2_accounting_mem_insert(c, a, gc); + if (ret) + return ret; + } + + struct accounting_mem_entry *e = &acc->k.data[idx]; + + EBUG_ON(bch2_accounting_counters(a.k) != e->nr_counters); + + for (unsigned i = 0; i < bch2_accounting_counters(a.k); i++) + this_cpu_add(e->v[gc][i], a.v->d[i]); + return 0; } static inline int bch2_accounting_mem_add(struct btree_trans *trans, struct bkey_s_c_accounting a, bool gc) { percpu_down_read(&trans->c->mark_lock); - int ret = bch2_accounting_mem_mod_locked(trans, a, gc); + int ret = bch2_accounting_mem_mod_locked(trans, a, gc, false); percpu_up_read(&trans->c->mark_lock); return ret; } diff --git a/fs/bcachefs/disk_accounting_format.h b/fs/bcachefs/disk_accounting_format.h index cba417060b333111c7ee027cd24e32074c4c8e88..7b6e6c97e6aa6a989e593aec2e671cf65749ce9a 100644 --- a/fs/bcachefs/disk_accounting_format.h +++ b/fs/bcachefs/disk_accounting_format.h @@ -103,7 +103,8 @@ static inline bool data_type_is_hidden(enum bch_data_type type) x(compression, 4) \ x(snapshot, 5) \ x(btree, 6) \ - x(rebalance_work, 7) + x(rebalance_work, 7) \ + x(inum, 8) enum disk_accounting_type { #define x(f, nr) BCH_DISK_ACCOUNTING_##f = nr, @@ -124,20 +125,23 @@ struct bch_dev_data_type { __u8 data_type; }; -struct bch_dev_stripe_buckets { - __u8 dev; -}; - struct bch_acct_compression { __u8 type; }; struct bch_acct_snapshot { __u32 id; -}; +} __packed; struct bch_acct_btree { __u32 id; +} __packed; + +struct bch_acct_inum { + __u64 inum; +} __packed; + +struct bch_acct_rebalance_work { }; struct disk_accounting_pos { @@ -149,12 +153,13 @@ struct disk_accounting_pos { struct bch_persistent_reserved persistent_reserved; struct bch_replicas_entry_v1 replicas; struct bch_dev_data_type dev_data_type; - struct bch_dev_stripe_buckets dev_stripe_buckets; struct bch_acct_compression compression; struct bch_acct_snapshot snapshot; struct bch_acct_btree btree; - }; - }; + struct bch_acct_rebalance_work rebalance_work; + struct bch_acct_inum inum; + } __packed; + } __packed; struct bpos _pad; }; }; diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 9b5b5c9a6c638cd63f84fb25b97c9643f1c9affa..141a4c63142f5bceb8b70a8a58973b91000d01a5 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -107,24 +107,23 @@ struct ec_bio { /* Stripes btree keys: */ -int bch2_stripe_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_stripe_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; int ret = 0; bkey_fsck_err_on(bkey_eq(k.k->p, POS_MIN) || - bpos_gt(k.k->p, POS(0, U32_MAX)), c, err, - stripe_pos_bad, + bpos_gt(k.k->p, POS(0, U32_MAX)), + c, stripe_pos_bad, "stripe at bad pos"); - bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), c, err, - stripe_val_size_bad, + bkey_fsck_err_on(bkey_val_u64s(k.k) < stripe_val_u64s(s), + c, stripe_val_size_bad, "incorrect value size (%zu < %u)", bkey_val_u64s(k.k), stripe_val_u64s(s)); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -1809,6 +1808,9 @@ static int new_stripe_alloc_buckets(struct btree_trans *trans, struct ec_stripe_ BUG_ON(v->nr_blocks != h->s->nr_data + h->s->nr_parity); BUG_ON(v->nr_redundant != h->s->nr_parity); + /* * We bypass the sector allocator which normally does this: */ + bitmap_and(devs.d, devs.d, c->rw_devs[BCH_DATA_user].d, BCH_SB_MEMBERS_MAX); + for_each_set_bit(i, h->s->blocks_gotten, v->nr_blocks) { __clear_bit(v->ptrs[i].dev, devs.d); if (i < h->s->nr_data) @@ -2235,6 +2237,23 @@ void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c) mutex_unlock(&c->ec_stripes_heap_lock); } +static void bch2_new_stripe_to_text(struct printbuf *out, struct bch_fs *c, + struct ec_stripe_new *s) +{ + prt_printf(out, "\tidx %llu blocks %u+%u allocated %u ref %u %u %s obs", + s->idx, s->nr_data, s->nr_parity, + bitmap_weight(s->blocks_allocated, s->nr_data), + atomic_read(&s->ref[STRIPE_REF_io]), + atomic_read(&s->ref[STRIPE_REF_stripe]), + bch2_watermarks[s->h->watermark]); + + struct bch_stripe *v = &bkey_i_to_stripe(&s->new_stripe.key)->v; + unsigned i; + for_each_set_bit(i, s->blocks_gotten, v->nr_blocks) + prt_printf(out, " %u", s->blocks[i]); + prt_newline(out); +} + void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) { struct ec_stripe_head *h; @@ -2247,23 +2266,15 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c) bch2_watermarks[h->watermark]); if (h->s) - prt_printf(out, "\tidx %llu blocks %u+%u allocated %u\n", - h->s->idx, h->s->nr_data, h->s->nr_parity, - bitmap_weight(h->s->blocks_allocated, - h->s->nr_data)); + bch2_new_stripe_to_text(out, c, h->s); } mutex_unlock(&c->ec_stripe_head_lock); prt_printf(out, "in flight:\n"); mutex_lock(&c->ec_stripe_new_lock); - list_for_each_entry(s, &c->ec_stripe_new_list, list) { - prt_printf(out, "\tidx %llu blocks %u+%u ref %u %u %s\n", - s->idx, s->nr_data, s->nr_parity, - atomic_read(&s->ref[STRIPE_REF_io]), - atomic_read(&s->ref[STRIPE_REF_stripe]), - bch2_watermarks[s->h->watermark]); - } + list_for_each_entry(s, &c->ec_stripe_new_list, list) + bch2_new_stripe_to_text(out, c, s); mutex_unlock(&c->ec_stripe_new_lock); } diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index 84a23eeb62495e1d4f9d620fff8c654dbbbd9b2a..90962b3c0130572a75e0d333d3ee70c0fb4e29e8 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -8,8 +8,7 @@ enum bch_validate_flags; -int bch2_stripe_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_stripe_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, @@ -17,7 +16,7 @@ int bch2_trigger_stripe(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_stripe ((struct bkey_ops) { \ - .key_invalid = bch2_stripe_invalid, \ + .key_validate = bch2_stripe_validate, \ .val_to_text = bch2_stripe_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_stripe, \ diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index a268af3e52bfd839018992649ef4cca2f5ca9f11..ab5a7adece1042738942a03543af5691d523db91 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -166,6 +166,7 @@ x(0, journal_reclaim_would_deadlock) \ x(EINVAL, fsck) \ x(BCH_ERR_fsck, fsck_fix) \ + x(BCH_ERR_fsck, fsck_delete_bkey) \ x(BCH_ERR_fsck, fsck_ignore) \ x(BCH_ERR_fsck, fsck_errors_not_fixed) \ x(BCH_ERR_fsck, fsck_repair_unimplemented) \ diff --git a/fs/bcachefs/error.c b/fs/bcachefs/error.c index a62b63108820033eaa3753cf111705eebd24b23d..95afa7bf20205c653bc2dd7cde3188d00fe2ce12 100644 --- a/fs/bcachefs/error.c +++ b/fs/bcachefs/error.c @@ -416,6 +416,28 @@ err: return ret; } +int __bch2_bkey_fsck_err(struct bch_fs *c, + struct bkey_s_c k, + enum bch_fsck_flags flags, + enum bch_sb_error_id err, + const char *fmt, ...) +{ + struct printbuf buf = PRINTBUF; + va_list args; + + prt_str(&buf, "invalid bkey "); + bch2_bkey_val_to_text(&buf, c, k); + prt_str(&buf, "\n "); + va_start(args, fmt); + prt_vprintf(&buf, fmt, args); + va_end(args); + prt_str(&buf, ": delete?"); + + int ret = __bch2_fsck_err(c, NULL, flags, err, "%s", buf.buf); + printbuf_exit(&buf); + return ret; +} + void bch2_flush_fsck_errs(struct bch_fs *c) { struct fsck_err_state *s, *n; diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h index 995e6bba9bad8098074ca18ee96404ac4c4847b9..2f1b86978f3660e1b001504b8b403e951c30e0c9 100644 --- a/fs/bcachefs/error.h +++ b/fs/bcachefs/error.h @@ -4,6 +4,7 @@ #include #include +#include "bkey_types.h" #include "sb-errors.h" struct bch_dev; @@ -166,24 +167,30 @@ void bch2_flush_fsck_errs(struct bch_fs *); #define fsck_err_on(cond, c, _err_type, ...) \ __fsck_err_on(cond, c, FSCK_CAN_FIX|FSCK_CAN_IGNORE, _err_type, __VA_ARGS__) -__printf(4, 0) -static inline void bch2_bkey_fsck_err(struct bch_fs *c, - struct printbuf *err_msg, - enum bch_sb_error_id err_type, - const char *fmt, ...) -{ - va_list args; +__printf(5, 6) +int __bch2_bkey_fsck_err(struct bch_fs *, + struct bkey_s_c, + enum bch_fsck_flags, + enum bch_sb_error_id, + const char *, ...); - va_start(args, fmt); - prt_vprintf(err_msg, fmt, args); - va_end(args); -} - -#define bkey_fsck_err(c, _err_msg, _err_type, ...) \ +/* + * for now, bkey fsck errors are always handled by deleting the entire key - + * this will change at some point + */ +#define bkey_fsck_err(c, _err_type, _err_msg, ...) \ do { \ - prt_printf(_err_msg, __VA_ARGS__); \ - bch2_sb_error_count(c, BCH_FSCK_ERR_##_err_type); \ - ret = -BCH_ERR_invalid_bkey; \ + if ((flags & BCH_VALIDATE_silent)) { \ + ret = -BCH_ERR_fsck_delete_bkey; \ + goto fsck_err; \ + } \ + int _ret = __bch2_bkey_fsck_err(c, k, FSCK_CAN_FIX, \ + BCH_FSCK_ERR_##_err_type, \ + _err_msg, ##__VA_ARGS__); \ + if (_ret != -BCH_ERR_fsck_fix && \ + _ret != -BCH_ERR_fsck_ignore) \ + ret = _ret; \ + ret = -BCH_ERR_fsck_delete_bkey; \ goto fsck_err; \ } while (0) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 07973198e35fb72aa20ba1de1a2f327a089745a6..e317df3644a1197db95d43fcc202ec34d1f95984 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -171,17 +171,16 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, - btree_ptr_val_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, + c, btree_ptr_val_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -192,28 +191,27 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_btree_ptr_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); int ret = 0; bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, - c, err, btree_ptr_v2_val_too_big, + c, btree_ptr_v2_val_too_big, "value too big (%zu > %zu)", bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), - c, err, btree_ptr_v2_min_key_bad, + c, btree_ptr_v2_min_key_bad, "min_key > key"); if (flags & BCH_VALIDATE_write) bkey_fsck_err_on(!bp.v->sectors_written, - c, err, btree_ptr_v2_written_0, + c, btree_ptr_v2_written_0, "sectors_written == 0"); - ret = bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_validate(c, k, flags); fsck_err: return ret; } @@ -399,15 +397,14 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reservation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); int ret = 0; - bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, - reservation_key_nr_replicas_invalid, + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, + c, reservation_key_nr_replicas_invalid, "invalid nr_replicas (%u)", r.v->nr_replicas); fsck_err: return ret; @@ -1020,6 +1017,8 @@ void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struc prt_printf(out, "ptr: %u:%llu:%u gen %u", ptr->dev, b, offset, ptr->gen); + if (ca->mi.durability != 1) + prt_printf(out, " d=%u", ca->mi.durability); if (ptr->cached) prt_str(out, " cached"); if (ptr->unwritten) @@ -1102,14 +1101,12 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } - -static int extent_ptr_invalid(struct bch_fs *c, - struct bkey_s_c k, - enum bch_validate_flags flags, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata, - struct printbuf *err) +static int extent_ptr_validate(struct bch_fs *c, + struct bkey_s_c k, + enum bch_validate_flags flags, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { int ret = 0; @@ -1128,28 +1125,27 @@ static int extent_ptr_invalid(struct bch_fs *c, struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); bkey_for_each_ptr(ptrs, ptr2) - bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, - ptr_to_duplicate_device, + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, + c, ptr_to_duplicate_device, "multiple pointers to same device (%u)", ptr->dev); - bkey_fsck_err_on(bucket >= nbuckets, c, err, - ptr_after_last_bucket, + bkey_fsck_err_on(bucket >= nbuckets, + c, ptr_after_last_bucket, "pointer past last bucket (%llu > %llu)", bucket, nbuckets); - bkey_fsck_err_on(bucket < first_bucket, c, err, - ptr_before_first_bucket, + bkey_fsck_err_on(bucket < first_bucket, + c, ptr_before_first_bucket, "pointer before first bucket (%llu < %u)", bucket, first_bucket); - bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, c, err, - ptr_spans_multiple_buckets, + bkey_fsck_err_on(bucket_offset + size_ondisk > bucket_size, + c, ptr_spans_multiple_buckets, "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, bucket_size); fsck_err: return ret; } -int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_bkey_ptrs_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; @@ -1164,25 +1160,24 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, - extent_ptrs_invalid_entry, - "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, + c, extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry), c, err, - btree_ptr_has_non_ptr, + !extent_entry_is_ptr(entry), + c, btree_ptr_has_non_ptr, "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ret = extent_ptr_invalid(c, k, flags, &entry->ptr, - size_ondisk, false, err); + ret = extent_ptr_validate(c, k, flags, &entry->ptr, size_ondisk, false); if (ret) return ret; - bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, - ptr_cached_and_erasure_coded, + bkey_fsck_err_on(entry->ptr.cached && have_ec, + c, ptr_cached_and_erasure_coded, "cached, erasure coded ptr"); if (!entry->ptr.unwritten) @@ -1199,44 +1194,50 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, - ptr_crc_uncompressed_size_too_small, + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, + c, ptr_crc_uncompressed_size_too_small, "checksum offset + key size > uncompressed size"); - bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, - ptr_crc_csum_type_unknown, + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), + c, ptr_crc_csum_type_unknown, "invalid checksum type"); - bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, - ptr_crc_compression_type_unknown, + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, + c, ptr_crc_compression_type_unknown, "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; else if (nonce != crc.offset + crc.nonce) - bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + bkey_fsck_err(c, ptr_crc_nonce_mismatch, "incorrect nonce"); } - bkey_fsck_err_on(crc_since_last_ptr, c, err, - ptr_crc_redundant, + bkey_fsck_err_on(crc_since_last_ptr, + c, ptr_crc_redundant, "redundant crc entry"); crc_since_last_ptr = true; bkey_fsck_err_on(crc_is_encoded(crc) && (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && - (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), c, err, - ptr_crc_uncompressed_size_too_big, + (flags & (BCH_VALIDATE_write|BCH_VALIDATE_commit)), + c, ptr_crc_uncompressed_size_too_big, "too large encoded extent"); size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - bkey_fsck_err_on(have_ec, c, err, - ptr_stripe_redundant, + bkey_fsck_err_on(have_ec, + c, ptr_stripe_redundant, "redundant stripe entry"); have_ec = true; break; case BCH_EXTENT_ENTRY_rebalance: { + /* + * this shouldn't be a fsck error, for forward + * compatibility; the rebalance code should just refetch + * the compression opt if it's unknown + */ +#if 0 const struct bch_extent_rebalance *r = &entry->rebalance; if (!bch2_compression_opt_valid(r->compression)) { @@ -1245,28 +1246,29 @@ int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, opt.type, opt.level); return -BCH_ERR_invalid_bkey; } +#endif break; } } } - bkey_fsck_err_on(!nr_ptrs, c, err, - extent_ptrs_no_ptrs, + bkey_fsck_err_on(!nr_ptrs, + c, extent_ptrs_no_ptrs, "no ptrs"); - bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, - extent_ptrs_too_many_ptrs, + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, + c, extent_ptrs_too_many_ptrs, "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); - bkey_fsck_err_on(have_written && have_unwritten, c, err, - extent_ptrs_written_and_unwritten, + bkey_fsck_err_on(have_written && have_unwritten, + c, extent_ptrs_written_and_unwritten, "extent with unwritten and written ptrs"); - bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, - extent_ptrs_unwritten, + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, + c, extent_ptrs_unwritten, "has unwritten ptrs"); - bkey_fsck_err_on(crc_since_last_ptr, c, err, - extent_ptrs_redundant_crc, + bkey_fsck_err_on(crc_since_last_ptr, + c, extent_ptrs_redundant_crc, "redundant crc entry"); - bkey_fsck_err_on(have_ec, c, err, - extent_ptrs_redundant_stripe, + bkey_fsck_err_on(have_ec, + c, extent_ptrs_redundant_stripe, "redundant stripe entry"); fsck_err: return ret; @@ -1377,6 +1379,45 @@ bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) return r != NULL; } +static u64 __bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + u64 sectors = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { + sectors = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + sectors += p.crc.compressed_size; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) + if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, target)) + sectors += p.crc.compressed_size; + } + + return sectors; +} + +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + return r ? __bch2_bkey_sectors_need_rebalance(c, k, r->target, r->compression) : 0; +} + int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, struct bch_io_opts *opts) { diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index facdb8a86eec849c90899d5cc33e910d194001f6..709dd83183be1fe5961da72681861390dd49dc8e 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -409,26 +409,26 @@ int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_btree_ptr_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_btree_ptr_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_btree_ptr_v2_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, int, struct bkey_s); #define bch2_bkey_ops_btree_ptr ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ + .key_validate = bch2_btree_ptr_validate, \ .val_to_text = bch2_btree_ptr_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_extent, \ }) #define bch2_bkey_ops_btree_ptr_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_v2_invalid, \ + .key_validate = bch2_btree_ptr_v2_validate, \ .val_to_text = bch2_btree_ptr_v2_to_text, \ .swab = bch2_ptr_swab, \ .compat = bch2_btree_ptr_v2_compat, \ @@ -441,7 +441,7 @@ void bch2_btree_ptr_v2_compat(enum btree_id, unsigned, unsigned, bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_extent ((struct bkey_ops) { \ - .key_invalid = bch2_bkey_ptrs_invalid, \ + .key_validate = bch2_bkey_ptrs_validate, \ .val_to_text = bch2_bkey_ptrs_to_text, \ .swab = bch2_ptr_swab, \ .key_normalize = bch2_extent_normalize, \ @@ -451,13 +451,13 @@ bool bch2_extent_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_reservation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reservation_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); #define bch2_bkey_ops_reservation ((struct bkey_ops) { \ - .key_invalid = bch2_reservation_invalid, \ + .key_validate = bch2_reservation_validate, \ .val_to_text = bch2_reservation_to_text, \ .key_merge = bch2_reservation_merge, \ .trigger = bch2_trigger_reservation, \ @@ -683,8 +683,8 @@ bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *, const struct bch_extent_ptr *); void bch2_bkey_ptrs_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_bkey_ptrs_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_bkey_ptrs_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_ptr_swab(struct bkey_s); @@ -692,6 +692,7 @@ const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c); unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *, struct bkey_s_c, unsigned, unsigned); bool bch2_bkey_needs_rebalance(struct bch_fs *, struct bkey_s_c); +u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *, struct bkey_s_c); int bch2_bkey_set_needs_rebalance(struct bch_fs *, struct bkey_i *, struct bch_io_opts *); diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c index cc33d763f7221d21c39bed6b66f4ccb9f95e5cd5..184d0385167689acfef82bc648611f2b8919665d 100644 --- a/fs/bcachefs/fs-io-buffered.c +++ b/fs/bcachefs/fs-io-buffered.c @@ -534,7 +534,7 @@ do_io: if (f_sectors > w->tmp_sectors) { kfree(w->tmp); - w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), __GFP_NOFAIL); + w->tmp = kcalloc(f_sectors, sizeof(struct bch_folio_sector), GFP_NOFS|__GFP_NOFAIL); w->tmp_sectors = f_sectors; } diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c index aea8132d2c40e585eabd923a9138035f87827455..99c7fe987c74ffa19fa9abe31a479001e9322639 100644 --- a/fs/bcachefs/fs-ioctl.c +++ b/fs/bcachefs/fs-ioctl.c @@ -328,9 +328,8 @@ static int bch2_ioc_setlabel(struct bch_fs *c, mutex_lock(&c->sb_lock); strscpy(c->disk_sb.sb->label, label, BCH_SB_LABEL_SIZE); - mutex_unlock(&c->sb_lock); - ret = bch2_write_super(c); + mutex_unlock(&c->sb_lock); mnt_drop_write_file(file); return ret; diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3a5f49affa0a96a185df20084ffd7589fc6b945d..94c392abef65cdda64624c9eeec5c56875611e1e 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -193,7 +193,7 @@ static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_ino * only insert fully created inodes in the inode hash table. But * discard_new_inode() expects it to be set... */ - inode->v.i_flags |= I_NEW; + inode->v.i_state |= I_NEW; /* * We don't want bch2_evict_inode() to delete the inode on disk, * we just raced and had another inode in cache. Normally new @@ -1199,7 +1199,7 @@ static const struct inode_operations bch_file_inode_operations = { .fiemap = bch2_fiemap, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1219,7 +1219,7 @@ static const struct inode_operations bch_dir_inode_operations = { .tmpfile = bch2_tmpfile, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1241,7 +1241,7 @@ static const struct inode_operations bch_symlink_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; @@ -1251,7 +1251,7 @@ static const struct inode_operations bch_special_inode_operations = { .setattr = bch2_setattr, .listxattr = bch2_xattr_list, #ifdef CONFIG_BCACHEFS_POSIX_ACL - .get_acl = bch2_get_acl, + .get_inode_acl = bch2_get_acl, .set_acl = bch2_set_acl, #endif }; diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 9138944c5ae6986d9ffec1f94ffbb42b419d94bc..83bd31b44aad0bf8107b46ccaef46b2d0c468b04 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -2006,7 +2006,6 @@ static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter * if (ret) { bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); ret = -BCH_ERR_fsck_repair_unimplemented; - ret = 0; goto err; } @@ -2216,6 +2215,8 @@ int bch2_check_xattrs(struct bch_fs *c) NULL, NULL, BCH_TRANS_COMMIT_no_enospc, check_xattr(trans, &iter, k, &hash_info, &inode))); + + inode_walker_exit(&inode); bch_err_fn(c, ret); return ret; } @@ -2469,8 +2470,7 @@ static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c ino : bch2_inode_unpack(inode_k, &inode); if (ret) { /* Should have been caught in dirents pass */ - if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) - bch_err(c, "error looking up parent directory: %i", ret); + bch_err_msg(c, ret, "error looking up parent directory"); break; } diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 1e20020eadd1f5cba5ac73caf13fb540243e68f6..2be6be33afa3e0d068bf514ee7890187fb70bf62 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -434,100 +434,98 @@ struct bkey_i *bch2_inode_to_v3(struct btree_trans *trans, struct bkey_i *k) return &inode_p->inode.k_i; } -static int __bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, struct printbuf *err) +static int __bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bch_inode_unpacked unpacked; int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); - bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, c, err, - inode_pos_blockdev_range, + bkey_fsck_err_on(k.k->p.offset < BLOCKDEV_INODE_MAX, + c, inode_pos_blockdev_range, "fs inode in blockdev range"); - bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), c, err, - inode_unpack_error, + bkey_fsck_err_on(bch2_inode_unpack(k, &unpacked), + c, inode_unpack_error, "invalid variable length fields"); - bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, c, err, - inode_checksum_type_invalid, + bkey_fsck_err_on(unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1, + c, inode_checksum_type_invalid, "invalid data checksum type (%u >= %u", unpacked.bi_data_checksum, BCH_CSUM_OPT_NR + 1); bkey_fsck_err_on(unpacked.bi_compression && - !bch2_compression_opt_valid(unpacked.bi_compression - 1), c, err, - inode_compression_type_invalid, + !bch2_compression_opt_valid(unpacked.bi_compression - 1), + c, inode_compression_type_invalid, "invalid compression opt %u", unpacked.bi_compression - 1); bkey_fsck_err_on((unpacked.bi_flags & BCH_INODE_unlinked) && - unpacked.bi_nlink != 0, c, err, - inode_unlinked_but_nlink_nonzero, + unpacked.bi_nlink != 0, + c, inode_unlinked_but_nlink_nonzero, "flagged as unlinked but bi_nlink != 0"); - bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), c, err, - inode_subvol_root_but_not_dir, + bkey_fsck_err_on(unpacked.bi_subvol && !S_ISDIR(unpacked.bi_mode), + c, inode_subvol_root_but_not_dir, "subvolume root but not a directory"); fsck_err: return ret; } -int bch2_inode_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); int ret = 0; - bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODE_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v2_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v2_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k); int ret = 0; - bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv2_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv2_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } -int bch2_inode_v3_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_v3_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_inode_v3 inode = bkey_s_c_to_inode_v3(k); int ret = 0; bkey_fsck_err_on(INODEv3_FIELDS_START(inode.v) < INODEv3_FIELDS_START_INITIAL || - INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), c, err, - inode_v3_fields_start_bad, + INODEv3_FIELDS_START(inode.v) > bkey_val_u64s(inode.k), + c, inode_v3_fields_start_bad, "invalid fields_start (got %llu, min %u max %zu)", INODEv3_FIELDS_START(inode.v), INODEv3_FIELDS_START_INITIAL, bkey_val_u64s(inode.k)); - bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, c, err, - inode_str_hash_invalid, + bkey_fsck_err_on(INODEv3_STR_HASH(inode.v) >= BCH_STR_HASH_NR, + c, inode_str_hash_invalid, "invalid str hash type (%llu >= %u)", INODEv3_STR_HASH(inode.v), BCH_STR_HASH_NR); - ret = __bch2_inode_invalid(c, k, err); + ret = __bch2_inode_validate(c, k, flags); fsck_err: return ret; } @@ -625,14 +623,13 @@ int bch2_trigger_inode(struct btree_trans *trans, return 0; } -int bch2_inode_generation_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_inode_generation_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode, c, err, - inode_pos_inode_nonzero, + bkey_fsck_err_on(k.k->p.inode, + c, inode_pos_inode_nonzero, "nonzero k.p.inode"); fsck_err: return ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index da0e4a7450990f80bef2e61ffeccb31921d56a95..f1fcb4c58039a5f6ee0a9185a6d3fd8065944399 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,12 +9,12 @@ enum bch_validate_flags; extern const char * const bch2_inode_opts[]; -int bch2_inode_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v2_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -int bch2_inode_v3_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v2_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); +int bch2_inode_v3_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, @@ -22,21 +22,21 @@ int bch2_trigger_inode(struct btree_trans *, enum btree_id, unsigned, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_inode ((struct bkey_ops) { \ - .key_invalid = bch2_inode_invalid, \ + .key_validate = bch2_inode_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 16, \ }) #define bch2_bkey_ops_inode_v2 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v2_invalid, \ + .key_validate = bch2_inode_v2_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 32, \ }) #define bch2_bkey_ops_inode_v3 ((struct bkey_ops) { \ - .key_invalid = bch2_inode_v3_invalid, \ + .key_validate = bch2_inode_v3_validate, \ .val_to_text = bch2_inode_to_text, \ .trigger = bch2_trigger_inode, \ .min_val_size = 48, \ @@ -49,12 +49,12 @@ static inline bool bkey_is_inode(const struct bkey *k) k->type == KEY_TYPE_inode_v3; } -int bch2_inode_generation_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_inode_generation_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_inode_generation ((struct bkey_ops) { \ - .key_invalid = bch2_inode_generation_invalid, \ + .key_validate = bch2_inode_generation_validate, \ .val_to_text = bch2_inode_generation_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/io_misc.c b/fs/bcachefs/io_misc.c index 2cf6297756f8e1eab9b87fb3fbe806a0604510a5..177ed331c00b1d4a05d5eb523e430732b251dd8d 100644 --- a/fs/bcachefs/io_misc.c +++ b/fs/bcachefs/io_misc.c @@ -126,11 +126,7 @@ err_noprint: if (closure_nr_remaining(&cl) != 1) { bch2_trans_unlock_long(trans); - - if (closure_sync_timeout(&cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&cl); - } + bch2_wait_on_allocator(c, &cl); } return ret; diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 4531c9ab3e12675738bde6af7d08351b073ebb45..7ee3b75480df307dffc0921bf31df71e0a1757c1 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -406,6 +406,7 @@ static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio bch2_trans_iter_init(trans, &iter, rbio->data_btree, rbio->read_pos, BTREE_ITER_slots); retry: + bch2_trans_begin(trans); rbio->bio.bi_status = 0; k = bch2_btree_iter_peek_slot(&iter); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d31c8d006d979138821a0223dbf6aa9bf0645bd1..1d4761d150023af0c1a9dca31b765c00683e901a 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -1503,10 +1503,7 @@ err: if ((op->flags & BCH_WRITE_SYNC) || (!(op->flags & BCH_WRITE_SUBMITTED) && !(op->flags & BCH_WRITE_IN_WORKER))) { - if (closure_sync_timeout(&op->cl, HZ * 10)) { - bch2_print_allocator_stuck(c); - closure_sync(&op->cl); - } + bch2_wait_on_allocator(c, &op->cl); __bch2_write_index(op); diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 649e3a01608af408d70f6b2f498f01255589f70c..f5f7db50ca310c3a5d8fc39f25925f516230bc6d 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -1260,7 +1260,7 @@ int bch2_fs_journal_start(struct journal *j, u64 cur_seq) } if (!had_entries) - j->last_empty_seq = cur_seq; + j->last_empty_seq = cur_seq - 1; /* to match j->seq */ spin_lock(&j->lock); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7a833a3f1c63a11489c0cab0014d29e297c60436..7664b68e6a15ee0c54aa023fed8d15c6980365da 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -332,7 +332,6 @@ static int journal_validate_key(struct bch_fs *c, { int write = flags & BCH_VALIDATE_write; void *next = vstruct_next(entry); - struct printbuf buf = PRINTBUF; int ret = 0; if (journal_entry_err_on(!k->k.u64s, @@ -368,34 +367,21 @@ static int journal_validate_key(struct bch_fs *c, bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); - if (bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf)) { - printbuf_reset(&buf); - journal_entry_err_msg(&buf, version, jset, entry); - prt_newline(&buf); - printbuf_indent_add(&buf, 2); - - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k)); - prt_newline(&buf); - bch2_bkey_invalid(c, bkey_i_to_s_c(k), - __btree_node_type(level, btree_id), write, &buf); - - mustfix_fsck_err(c, journal_entry_bkey_invalid, - "%s", buf.buf); - + ret = bch2_bkey_validate(c, bkey_i_to_s_c(k), + __btree_node_type(level, btree_id), write); + if (ret == -BCH_ERR_fsck_delete_bkey) { le16_add_cpu(&entry->u64s, -((u16) k->k.u64s)); memmove(k, bkey_next(k), next - (void *) bkey_next(k)); journal_entry_null_range(vstruct_next(entry), next); - - printbuf_exit(&buf); return FSCK_DELETED_KEY; } + if (ret) + goto fsck_err; if (write) bch2_bkey_compat(level, btree_id, version, big_endian, write, NULL, bkey_to_packed(k)); fsck_err: - printbuf_exit(&buf); return ret; } diff --git a/fs/bcachefs/journal_sb.c b/fs/bcachefs/journal_sb.c index db80e506e3abee5f2776d02cfe2508b90693b8b5..62b910f2fb27cd88af8867845c404ead988e4c16 100644 --- a/fs/bcachefs/journal_sb.c +++ b/fs/bcachefs/journal_sb.c @@ -104,6 +104,7 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f struct bch_sb_field_journal_v2 *journal = field_to_type(f, journal_v2); struct bch_member m = bch2_sb_member_get(sb, sb->dev_idx); int ret = -BCH_ERR_invalid_sb_journal; + u64 sum = 0; unsigned nr; unsigned i; struct u64_range *b; @@ -119,6 +120,15 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f for (i = 0; i < nr; i++) { b[i].start = le64_to_cpu(journal->d[i].start); b[i].end = b[i].start + le64_to_cpu(journal->d[i].nr); + + if (b[i].end <= b[i].start) { + prt_printf(err, "journal buckets entry with bad nr: %llu+%llu", + le64_to_cpu(journal->d[i].start), + le64_to_cpu(journal->d[i].nr)); + goto err; + } + + sum += le64_to_cpu(journal->d[i].nr); } sort(b, nr, sizeof(*b), u64_range_cmp, NULL); @@ -148,6 +158,11 @@ static int bch2_sb_journal_v2_validate(struct bch_sb *sb, struct bch_sb_field *f } } + if (sum > UINT_MAX) { + prt_printf(err, "too many journal buckets: %llu > %u", sum, UINT_MAX); + goto err; + } + ret = 0; err: kfree(b); diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index 83b1586cb3710bc5f4f7b007d4e2d52103beb08f..96f2f4f8c39787e79fc5f831d9a12edd4f0dae95 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -10,14 +10,13 @@ #include "recovery.h" /* KEY_TYPE_lru is obsolete: */ -int bch2_lru_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_lru_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(!lru_pos_time(k.k->p), c, err, - lru_entry_at_time_0, + bkey_fsck_err_on(!lru_pos_time(k.k->p), + c, lru_entry_at_time_0, "lru entry at time=0"); fsck_err: return ret; diff --git a/fs/bcachefs/lru.h b/fs/bcachefs/lru.h index 5bd8974a7f11e52fb70362cf60745ffcc7e342b7..e6a7d8241bb807bb658f79681f7a8fef114952cc 100644 --- a/fs/bcachefs/lru.h +++ b/fs/bcachefs/lru.h @@ -33,14 +33,13 @@ static inline enum bch_lru_type lru_type(struct bkey_s_c l) return BCH_LRU_read; } -int bch2_lru_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_lru_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_lru_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_lru_pos_to_text(struct printbuf *, struct bpos); #define bch2_bkey_ops_lru ((struct bkey_ops) { \ - .key_invalid = bch2_lru_invalid, \ + .key_validate = bch2_lru_validate, \ .val_to_text = bch2_lru_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index deef4f024d20bdc8acba5f66ac01b7447b16259f..d86565bf07c8c563cb5651d2ea7e585a3f2ff7df 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -383,7 +383,7 @@ static int bch2_copygc_thread(void *arg) if (min_member_capacity == U64_MAX) min_member_capacity = 128 * 2048; - bch2_trans_unlock_long(ctxt.trans); + move_buckets_wait(&ctxt, buckets, true); bch2_kthread_io_clock_wait(clock, last + (min_member_capacity >> 6), MAX_SCHEDULE_TIMEOUT); } diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 60b93018501f9814c3504ab43bf24d7d82e5c42f..cda1725702eaf7aafa74258cdfbb952583058b23 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -391,6 +391,11 @@ enum fsck_err_opts { OPT_BOOL(), \ BCH_SB_JOURNAL_TRANSACTION_NAMES, true, \ NULL, "Log transaction function names in journal") \ + x(allocator_stuck_timeout, u16, \ + OPT_FS|OPT_FORMAT|OPT_MOUNT|OPT_RUNTIME, \ + OPT_UINT(0, U16_MAX), \ + BCH_SB_ALLOCATOR_STUCK_TIMEOUT, 30, \ + NULL, "Default timeout in seconds for stuck allocator messages")\ x(noexcl, u8, \ OPT_FS|OPT_MOUNT, \ OPT_BOOL(), \ diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index a0cca8b70e0ae173e8f10496c1d679028969743f..c32a05e252e2abcbcc073db148741b531d11dfe8 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -59,13 +59,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { .to_text = bch2_sb_quota_to_text, }; -int bch2_quota_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_quota_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; - bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, c, err, - quota_type_invalid, + bkey_fsck_err_on(k.k->p.inode >= QTYP_NR, + c, quota_type_invalid, "invalid quota type (%llu >= %u)", k.k->p.inode, QTYP_NR); fsck_err: diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 02d37a332218a80f5688a0eb36061db32a14e4f1..a62abcc5332ad399c2022eb155e41106539c7ea8 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -8,12 +8,11 @@ enum bch_validate_flags; extern const struct bch_sb_field_ops bch_sb_field_ops_quota; -int bch2_quota_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_quota_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_quota ((struct bkey_ops) { \ - .key_invalid = bch2_quota_invalid, \ + .key_validate = bch2_quota_validate, \ .val_to_text = bch2_quota_to_text, \ .min_val_size = 32, \ }) diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index d89eb43c5ce95138ef527bb61de682d8687e2b14..36de1c6fe8c36e4ebc4c14af1eaf6f44a727e4a6 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -241,7 +241,13 @@ static int journal_sort_seq_cmp(const void *_l, const void *_r) const struct journal_key *l = *((const struct journal_key **)_l); const struct journal_key *r = *((const struct journal_key **)_r); - return cmp_int(l->journal_seq, r->journal_seq); + /* + * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last + * + * journal_seq == 0 means that the key comes from early repair, and + * should be inserted last so as to avoid overflowing the journal + */ + return cmp_int(l->journal_seq - 1, r->journal_seq - 1); } int bch2_journal_replay(struct bch_fs *c) @@ -322,6 +328,7 @@ int bch2_journal_replay(struct bch_fs *c) } } + bch2_trans_unlock_long(trans); /* * Now, replay any remaining keys in the order in which they appear in * the journal, unpinning those journal entries as we go: diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c index 5f92715e1525a233b36719a2ed7c4ba502c9fc75..e59c0abb47723bd2eb2b55a09f365061d0063586 100644 --- a/fs/bcachefs/reflink.c +++ b/fs/bcachefs/reflink.c @@ -29,15 +29,14 @@ static inline unsigned bkey_type_to_indirect(const struct bkey *k) /* reflink pointers */ -int bch2_reflink_p_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_p_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k); int ret = 0; bkey_fsck_err_on(le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad), - c, err, reflink_p_front_pad_bad, + c, reflink_p_front_pad_bad, "idx < front_pad (%llu < %u)", le64_to_cpu(p.v->idx), le32_to_cpu(p.v->front_pad)); fsck_err: @@ -256,11 +255,10 @@ int bch2_trigger_reflink_p(struct btree_trans *trans, /* indirect extents */ -int bch2_reflink_v_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_reflink_v_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { - return bch2_bkey_ptrs_invalid(c, k, flags, err); + return bch2_bkey_ptrs_validate(c, k, flags); } void bch2_reflink_v_to_text(struct printbuf *out, struct bch_fs *c, @@ -311,9 +309,8 @@ int bch2_trigger_reflink_v(struct btree_trans *trans, /* indirect inline data */ -int bch2_indirect_inline_data_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_indirect_inline_data_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { return 0; } diff --git a/fs/bcachefs/reflink.h b/fs/bcachefs/reflink.h index e894f3a2c67ac6225fc2edecc8de0d538520aa4e..51afe11d8ed65b3159ad1a39aebe879970d36647 100644 --- a/fs/bcachefs/reflink.h +++ b/fs/bcachefs/reflink.h @@ -4,41 +4,37 @@ enum bch_validate_flags; -int bch2_reflink_p_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_p_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_p_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); bool bch2_reflink_p_merge(struct bch_fs *, struct bkey_s, struct bkey_s_c); int bch2_trigger_reflink_p(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_p ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_p_invalid, \ + .key_validate = bch2_reflink_p_validate, \ .val_to_text = bch2_reflink_p_to_text, \ .key_merge = bch2_reflink_p_merge, \ .trigger = bch2_trigger_reflink_p, \ .min_val_size = 16, \ }) -int bch2_reflink_v_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); -void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); +int bch2_reflink_v_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); +void bch2_reflink_v_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_reflink_v(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_reflink_v ((struct bkey_ops) { \ - .key_invalid = bch2_reflink_v_invalid, \ + .key_validate = bch2_reflink_v_validate, \ .val_to_text = bch2_reflink_v_to_text, \ .swab = bch2_ptr_swab, \ .trigger = bch2_trigger_reflink_v, \ .min_val_size = 8, \ }) -int bch2_indirect_inline_data_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_indirect_inline_data_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); void bch2_indirect_inline_data_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_trigger_indirect_inline_data(struct btree_trans *, @@ -47,7 +43,7 @@ int bch2_trigger_indirect_inline_data(struct btree_trans *, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_indirect_inline_data ((struct bkey_ops) { \ - .key_invalid = bch2_indirect_inline_data_invalid, \ + .key_validate = bch2_indirect_inline_data_validate, \ .val_to_text = bch2_indirect_inline_data_to_text, \ .trigger = bch2_trigger_indirect_inline_data, \ .min_val_size = 8, \ diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 10c96cb2047ae1e71a9d3a36ad9b13e6858f5361..12b1d28b7eb499b5a30c1c990cd44dfd7c8d98e4 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -24,7 +24,6 @@ static int bch2_memcmp(const void *l, const void *r, const void *priv) static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) { #ifdef CONFIG_BCACHEFS_DEBUG - BUG_ON(e->data_type >= BCH_DATA_NR); BUG_ON(!e->nr_devs); BUG_ON(e->nr_required > 1 && e->nr_required >= e->nr_devs); @@ -452,7 +451,8 @@ retry: .type = BCH_DISK_ACCOUNTING_replicas, }; - memcpy(&k.replicas, e, replicas_entry_bytes(e)); + unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), + "embedded variable length struct"); struct bpos p = disk_accounting_pos_to_bpos(&k); diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c index dfbbd33c8731b64ad62803f37f93ddd79e15eb54..c7e4cdd3f6a521d01a1705c07553073a38076503 100644 --- a/fs/bcachefs/sb-downgrade.c +++ b/fs/bcachefs/sb-downgrade.c @@ -61,6 +61,23 @@ BCH_FSCK_ERR_dev_usage_buckets_wrong, \ BCH_FSCK_ERR_dev_usage_sectors_wrong, \ BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_bkey_version_in_future, \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_accounting_mismatch, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ + BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ + BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ + BCH_FSCK_ERR_accounting_key_junk_at_end) \ + x(disk_accounting_inum, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ BCH_FSCK_ERR_accounting_mismatch) #define DOWNGRADE_TABLE() \ @@ -79,7 +96,25 @@ BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ BCH_FSCK_ERR_fs_usage_replicas_wrong, \ - BCH_FSCK_ERR_bkey_version_in_future) + BCH_FSCK_ERR_bkey_version_in_future) \ + x(disk_accounting_v3, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_dev_usage_buckets_wrong, \ + BCH_FSCK_ERR_dev_usage_sectors_wrong, \ + BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ + BCH_FSCK_ERR_fs_usage_hidden_wrong, \ + BCH_FSCK_ERR_fs_usage_btree_wrong, \ + BCH_FSCK_ERR_fs_usage_data_wrong, \ + BCH_FSCK_ERR_fs_usage_cached_wrong, \ + BCH_FSCK_ERR_fs_usage_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ + BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ + BCH_FSCK_ERR_fs_usage_replicas_wrong, \ + BCH_FSCK_ERR_accounting_replicas_not_marked, \ + BCH_FSCK_ERR_bkey_version_in_future) \ + x(rebalance_work_acct_fix, \ + BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ + BCH_FSCK_ERR_accounting_mismatch) struct upgrade_downgrade_entry { u64 recovery_passes; diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h index d1b2f2aa397a7111ac63ac3b47fbc1cfba096f46..d3a498617303e680ea30dda3400bdc672ea205ca 100644 --- a/fs/bcachefs/sb-errors_format.h +++ b/fs/bcachefs/sb-errors_format.h @@ -287,7 +287,11 @@ enum bch_fsck_flags { x(accounting_replicas_not_marked, 273, 0) \ x(invalid_btree_id, 274, 0) \ x(alloc_key_io_time_bad, 275, 0) \ - x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) + x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \ + x(accounting_key_junk_at_end, 277, 0) \ + x(accounting_key_replicas_nr_devs_0, 278, 0) \ + x(accounting_key_replicas_nr_required_bad, 279, 0) \ + x(accounting_key_replicas_devs_unsorted, 280, 0) \ enum bch_sb_error_id { #define x(t, n, ...) BCH_FSCK_ERR_##t = n, diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c index 96744b1a76f5bea192c9d535728714444b4f6918..8b18a9b483a4df8a608b2c366586857e74a3ac72 100644 --- a/fs/bcachefs/snapshot.c +++ b/fs/bcachefs/snapshot.c @@ -31,15 +31,14 @@ void bch2_snapshot_tree_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(t.v->root_snapshot)); } -int bch2_snapshot_tree_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_tree_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_tree_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_tree_pos_bad, "bad pos"); fsck_err: return ret; @@ -225,55 +224,54 @@ void bch2_snapshot_to_text(struct printbuf *out, struct bch_fs *c, le32_to_cpu(s.v->skip[2])); } -int bch2_snapshot_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_snapshot_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_snapshot s; u32 i, id; int ret = 0; bkey_fsck_err_on(bkey_gt(k.k->p, POS(0, U32_MAX)) || - bkey_lt(k.k->p, POS(0, 1)), c, err, - snapshot_pos_bad, + bkey_lt(k.k->p, POS(0, 1)), + c, snapshot_pos_bad, "bad pos"); s = bkey_s_c_to_snapshot(k); id = le32_to_cpu(s.v->parent); - bkey_fsck_err_on(id && id <= k.k->p.offset, c, err, - snapshot_parent_bad, + bkey_fsck_err_on(id && id <= k.k->p.offset, + c, snapshot_parent_bad, "bad parent node (%u <= %llu)", id, k.k->p.offset); - bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), c, err, - snapshot_children_not_normalized, + bkey_fsck_err_on(le32_to_cpu(s.v->children[0]) < le32_to_cpu(s.v->children[1]), + c, snapshot_children_not_normalized, "children not normalized"); - bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], c, err, - snapshot_child_duplicate, + bkey_fsck_err_on(s.v->children[0] && s.v->children[0] == s.v->children[1], + c, snapshot_child_duplicate, "duplicate child nodes"); for (i = 0; i < 2; i++) { id = le32_to_cpu(s.v->children[i]); - bkey_fsck_err_on(id >= k.k->p.offset, c, err, - snapshot_child_bad, + bkey_fsck_err_on(id >= k.k->p.offset, + c, snapshot_child_bad, "bad child node (%u >= %llu)", id, k.k->p.offset); } if (bkey_val_bytes(k.k) > offsetof(struct bch_snapshot, skip)) { bkey_fsck_err_on(le32_to_cpu(s.v->skip[0]) > le32_to_cpu(s.v->skip[1]) || - le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), c, err, - snapshot_skiplist_not_normalized, + le32_to_cpu(s.v->skip[1]) > le32_to_cpu(s.v->skip[2]), + c, snapshot_skiplist_not_normalized, "skiplist not normalized"); for (i = 0; i < ARRAY_SIZE(s.v->skip); i++) { id = le32_to_cpu(s.v->skip[i]); - bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), c, err, - snapshot_skiplist_bad, + bkey_fsck_err_on(id && id < le32_to_cpu(s.v->parent), + c, snapshot_skiplist_bad, "bad skiplist node %u", id); } } diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h index 31b0ee03e96288e8dd525fab7f65e9eb56020990..eb5ef64221d6e11b9adb47fc0f3ddabedb729914 100644 --- a/fs/bcachefs/snapshot.h +++ b/fs/bcachefs/snapshot.h @@ -5,11 +5,11 @@ enum bch_validate_flags; void bch2_snapshot_tree_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_tree_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_tree_validate(struct bch_fs *, struct bkey_s_c, + enum bch_validate_flags); #define bch2_bkey_ops_snapshot_tree ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_tree_invalid, \ + .key_validate = bch2_snapshot_tree_validate, \ .val_to_text = bch2_snapshot_tree_to_text, \ .min_val_size = 8, \ }) @@ -19,14 +19,13 @@ struct bkey_i_snapshot_tree *__bch2_snapshot_tree_create(struct btree_trans *); int bch2_snapshot_tree_lookup(struct btree_trans *, u32, struct bch_snapshot_tree *); void bch2_snapshot_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -int bch2_snapshot_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_snapshot_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); int bch2_mark_snapshot(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_snapshot ((struct bkey_ops) { \ - .key_invalid = bch2_snapshot_invalid, \ + .key_validate = bch2_snapshot_validate, \ .val_to_text = bch2_snapshot_to_text, \ .trigger = bch2_mark_snapshot, \ .min_val_size = 24, \ diff --git a/fs/bcachefs/subvolume.c b/fs/bcachefs/subvolume.c index f56720b558626a8eb8cf71823942fee0a4654993..dbe834cb349f4a8e6a1e72d24fafab3acf1186d4 100644 --- a/fs/bcachefs/subvolume.c +++ b/fs/bcachefs/subvolume.c @@ -207,23 +207,23 @@ int bch2_check_subvol_children(struct bch_fs *c) /* Subvolumes: */ -int bch2_subvolume_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, struct printbuf *err) +int bch2_subvolume_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_subvolume subvol = bkey_s_c_to_subvolume(k); int ret = 0; bkey_fsck_err_on(bkey_lt(k.k->p, SUBVOL_POS_MIN) || - bkey_gt(k.k->p, SUBVOL_POS_MAX), c, err, - subvol_pos_bad, + bkey_gt(k.k->p, SUBVOL_POS_MAX), + c, subvol_pos_bad, "invalid pos"); - bkey_fsck_err_on(!subvol.v->snapshot, c, err, - subvol_snapshot_bad, + bkey_fsck_err_on(!subvol.v->snapshot, + c, subvol_snapshot_bad, "invalid snapshot"); - bkey_fsck_err_on(!subvol.v->inode, c, err, - subvol_inode_bad, + bkey_fsck_err_on(!subvol.v->inode, + c, subvol_inode_bad, "invalid inode"); fsck_err: return ret; diff --git a/fs/bcachefs/subvolume.h b/fs/bcachefs/subvolume.h index afa5e871efb252db7304d62287f1fdc0bbfc89f6..a8299ba2cab2cefa23450e7e8c277a32cc77383f 100644 --- a/fs/bcachefs/subvolume.h +++ b/fs/bcachefs/subvolume.h @@ -10,15 +10,14 @@ enum bch_validate_flags; int bch2_check_subvols(struct bch_fs *); int bch2_check_subvol_children(struct bch_fs *); -int bch2_subvolume_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_subvolume_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_subvolume_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); int bch2_subvolume_trigger(struct btree_trans *, enum btree_id, unsigned, struct bkey_s_c, struct bkey_s, enum btree_iter_update_trigger_flags); #define bch2_bkey_ops_subvolume ((struct bkey_ops) { \ - .key_invalid = bch2_subvolume_invalid, \ + .key_validate = bch2_subvolume_validate, \ .val_to_text = bch2_subvolume_to_text, \ .trigger = bch2_subvolume_trigger, \ .min_val_size = 16, \ diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 8bc8198327904454da03e34a9da8931d7f92d5f0..c8c2ccbdfbb5891faa1de7e2c5ca2a50724a93ec 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -414,6 +414,10 @@ static int bch2_sb_validate(struct bch_sb_handle *disk_sb, if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); + + if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && + !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) + SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); } for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0455a1001fec2bbc7675f8a4bfbcd8459fa5d358..e7fa2de35014596e0a471248063f245305717f0f 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1193,7 +1193,6 @@ static void bch2_dev_free(struct bch_dev *ca) if (ca->kobj.state_in_sysfs) kobject_del(&ca->kobj); - kfree(ca->buckets_nouse); bch2_free_super(&ca->disk_sb); bch2_dev_allocator_background_exit(ca); bch2_dev_journal_exit(ca); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 1c0d1fb20276aa801fd00fac8d75a991ea94612c..f393023a3ae29acf9f3c3f8309336c5f59ec4ccd 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -367,7 +367,7 @@ SHOW(bch2_fs) bch2_stripes_heap_to_text(out, c); if (attr == &sysfs_open_buckets) - bch2_open_buckets_to_text(out, c); + bch2_open_buckets_to_text(out, c, NULL); if (attr == &sysfs_open_buckets_partial) bch2_open_buckets_partial_to_text(out, c); @@ -811,6 +811,9 @@ SHOW(bch2_dev) if (attr == &sysfs_alloc_debug) bch2_dev_alloc_debug_to_text(out, ca); + if (attr == &sysfs_open_buckets) + bch2_open_buckets_to_text(out, c, ca); + return 0; } @@ -892,6 +895,7 @@ struct attribute *bch2_dev_files[] = { /* debug: */ &sysfs_alloc_debug, + &sysfs_open_buckets, NULL }; diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index dc48b52b01b49c4ed7af877921dd7e2b446d75a8..dfad1d06633ddb5d5487c28009c2a276f95b5d18 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -4,6 +4,7 @@ #include "buckets.h" #include "btree_cache.h" #include "btree_iter.h" +#include "btree_key_cache.h" #include "btree_locking.h" #include "btree_update_interior.h" #include "keylist.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index d0e6b9deb6cb4d9433f4556042b83ad8bc956663..c62f00322d1ed6778106f215cbe783aaaf5c36c7 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -988,10 +988,33 @@ TRACE_EVENT(trans_restart_split_race, __entry->u64s_remaining) ); -DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, +TRACE_EVENT(trans_blocked_journal_reclaim, TP_PROTO(struct btree_trans *trans, unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + TP_ARGS(trans, caller_ip), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + + __field(unsigned long, key_cache_nr_keys ) + __field(unsigned long, key_cache_nr_dirty ) + __field(long, must_wait ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->key_cache_nr_keys = atomic_long_read(&trans->c->btree_key_cache.nr_keys); + __entry->key_cache_nr_dirty = atomic_long_read(&trans->c->btree_key_cache.nr_dirty); + __entry->must_wait = __bch2_btree_key_cache_must_wait(trans->c); + ), + + TP_printk("%s %pS key cache keys %lu dirty %lu must_wait %li", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->key_cache_nr_keys, + __entry->key_cache_nr_dirty, + __entry->must_wait) ); TRACE_EVENT(trans_restart_journal_preres_get, diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c index 138320eaa2ad3965a971890b0eab035fe860a83e..1b8554460af47e0fac887c575fda2b0278df0038 100644 --- a/fs/bcachefs/util.c +++ b/fs/bcachefs/util.c @@ -416,7 +416,6 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats printbuf_tabstop_push(out, TABSTOP_SIZE + 2); prt_printf(out, "\tsince mount\r\trecent\r\n"); - prt_printf(out, "recent"); printbuf_tabstops_reset(out); printbuf_tabstop_push(out, out->indent + 20); diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index c11bf6dacc2c767006e97fa5ac00b29b192e0ecd..331f944d73dc930c0aae3cf0134aba6550fb633f 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -70,17 +70,16 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { .cmp_bkey = xattr_cmp_bkey, }; -int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, - enum bch_validate_flags flags, - struct printbuf *err) +int bch2_xattr_validate(struct bch_fs *c, struct bkey_s_c k, + enum bch_validate_flags flags) { struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); unsigned val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len)); int ret = 0; - bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, c, err, - xattr_val_size_too_small, + bkey_fsck_err_on(bkey_val_u64s(k.k) < val_u64s, + c, xattr_val_size_too_small, "value too small (%zu < %u)", bkey_val_u64s(k.k), val_u64s); @@ -88,17 +87,17 @@ int bch2_xattr_invalid(struct bch_fs *c, struct bkey_s_c k, val_u64s = xattr_val_u64s(xattr.v->x_name_len, le16_to_cpu(xattr.v->x_val_len) + 4); - bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, c, err, - xattr_val_size_too_big, + bkey_fsck_err_on(bkey_val_u64s(k.k) > val_u64s, + c, xattr_val_size_too_big, "value too big (%zu > %u)", bkey_val_u64s(k.k), val_u64s); - bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), c, err, - xattr_invalid_type, + bkey_fsck_err_on(!bch2_xattr_type_to_handler(xattr.v->x_type), + c, xattr_invalid_type, "invalid type (%u)", xattr.v->x_type); - bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), c, err, - xattr_name_invalid_chars, + bkey_fsck_err_on(memchr(xattr.v->x_name, '\0', xattr.v->x_name_len), + c, xattr_name_invalid_chars, "xattr name has invalid characters"); fsck_err: return ret; @@ -613,10 +612,20 @@ static int bch2_xattr_bcachefs_get_effective( name, buffer, size, true); } +/* Noop - xattrs in the bcachefs_effective namespace are inherited */ +static int bch2_xattr_bcachefs_set_effective(const struct xattr_handler *handler, + struct mnt_idmap *idmap, + struct dentry *dentry, struct inode *vinode, + const char *name, const void *value, + size_t size, int flags) +{ + return 0; +} + static const struct xattr_handler bch_xattr_bcachefs_effective_handler = { .prefix = "bcachefs_effective.", .get = bch2_xattr_bcachefs_get_effective, - .set = bch2_xattr_bcachefs_set, + .set = bch2_xattr_bcachefs_set_effective, }; #endif /* NO_BCACHEFS_FS */ diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 1574b9eb4c850566039596aac97a7ba600609a3f..c188a5ad64cef6b69dd20f0c9794c42bae24d71b 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -6,12 +6,11 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; -int bch2_xattr_invalid(struct bch_fs *, struct bkey_s_c, - enum bch_validate_flags, struct printbuf *); +int bch2_xattr_validate(struct bch_fs *, struct bkey_s_c, enum bch_validate_flags); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); #define bch2_bkey_ops_xattr ((struct bkey_ops) { \ - .key_invalid = bch2_xattr_invalid, \ + .key_validate = bch2_xattr_validate, \ .val_to_text = bch2_xattr_to_text, \ .min_val_size = 8, \ }) diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index c26545d71d39a3a548925c1fa65da7c356a3d81c..cd6d5bbb4b9df5e0d9d95ad634ae599f6c171b00 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -72,8 +72,10 @@ #ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET #define DATA_START_OFFSET_WORDS (0) +#define MAX_SHARED_LIBS_UPDATE (0) #else #define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS) +#define MAX_SHARED_LIBS_UPDATE (MAX_SHARED_LIBS) #endif struct lib_info { @@ -880,7 +882,7 @@ static int load_flat_binary(struct linux_binprm *bprm) return res; /* Update data segment pointers for all libraries */ - for (i = 0; i < MAX_SHARED_LIBS; i++) { + for (i = 0; i < MAX_SHARED_LIBS_UPDATE; i++) { if (!libinfo.lib_list[i].loaded) continue; for (j = 0; j < MAX_SHARED_LIBS; j++) { diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 498442d0c2163dd2570db2b64c9175b4daf61c0e..2e49d978f504edd1d1d8e54b2c995d885dac23ef 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1223,8 +1223,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans, block_group->space_info->total_bytes -= block_group->length; block_group->space_info->bytes_readonly -= (block_group->length - block_group->zone_unusable); - block_group->space_info->bytes_zone_unusable -= - block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info, + -block_group->zone_unusable); block_group->space_info->disk_total -= block_group->length * factor; spin_unlock(&block_group->space_info->lock); @@ -1396,7 +1396,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes to readonly */ sinfo->bytes_readonly += cache->zone_unusable; - sinfo->bytes_zone_unusable -= cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + -cache->zone_unusable); cache->zone_unusable = 0; } cache->ro++; @@ -3056,9 +3057,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache) if (btrfs_is_zoned(cache->fs_info)) { /* Migrate zone_unusable bytes back */ cache->zone_unusable = - (cache->alloc_offset - cache->used) + + (cache->alloc_offset - cache->used - cache->pinned - + cache->reserved) + (cache->length - cache->zone_capacity); - sinfo->bytes_zone_unusable += cache->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo, + cache->zone_unusable); sinfo->bytes_readonly -= cache->zone_unusable; } num_bytes = cache->length - cache->reserved - diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index c8568b1a61c4322317d7e8a66a641a1fdcb6da2d..75fa563e4cacb8b3df391fda8015140748e67ef5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -459,6 +459,7 @@ struct btrfs_file_private { void *filldir_buf; u64 last_index; struct extent_state *llseek_cached_state; + bool fsync_skip_inode_lock; }; static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 2ac9296edccb7c9bd56f7f8d65953374dc80dc26..06a9e0542d708b92f9bb790715bfa384e2aa4f7f 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -1134,6 +1134,73 @@ btrfs_find_delayed_ref_head(struct btrfs_delayed_ref_root *delayed_refs, u64 byt return find_ref_head(delayed_refs, bytenr, false); } +static int find_comp(struct btrfs_delayed_ref_node *entry, u64 root, u64 parent) +{ + int type = parent ? BTRFS_SHARED_BLOCK_REF_KEY : BTRFS_TREE_BLOCK_REF_KEY; + + if (type < entry->type) + return -1; + if (type > entry->type) + return 1; + + if (type == BTRFS_TREE_BLOCK_REF_KEY) { + if (root < entry->ref_root) + return -1; + if (root > entry->ref_root) + return 1; + } else { + if (parent < entry->parent) + return -1; + if (parent > entry->parent) + return 1; + } + return 0; +} + +/* + * Check to see if a given root/parent reference is attached to the head. This + * only checks for BTRFS_ADD_DELAYED_REF references that match, as that + * indicates the reference exists for the given root or parent. This is for + * tree blocks only. + * + * @head: the head of the bytenr we're searching. + * @root: the root objectid of the reference if it is a normal reference. + * @parent: the parent if this is a shared backref. + */ +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent) +{ + struct rb_node *node; + bool found = false; + + lockdep_assert_held(&head->mutex); + + spin_lock(&head->lock); + node = head->ref_tree.rb_root.rb_node; + while (node) { + struct btrfs_delayed_ref_node *entry; + int ret; + + entry = rb_entry(node, struct btrfs_delayed_ref_node, ref_node); + ret = find_comp(entry, root, parent); + if (ret < 0) { + node = node->rb_left; + } else if (ret > 0) { + node = node->rb_right; + } else { + /* + * We only want to count ADD actions, as drops mean the + * ref doesn't exist. + */ + if (entry->action == BTRFS_ADD_DELAYED_REF) + found = true; + break; + } + } + spin_unlock(&head->lock); + return found; +} + void __cold btrfs_delayed_ref_exit(void) { kmem_cache_destroy(btrfs_delayed_ref_head_cachep); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index ef15e998be03509b4081cac1626c1549468f2402..05f634eb472d84d419e1f8409e765544a0021f95 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -389,6 +389,8 @@ void btrfs_dec_delayed_refs_rsv_bg_updates(struct btrfs_fs_info *fs_info); int btrfs_delayed_refs_rsv_refill(struct btrfs_fs_info *fs_info, enum btrfs_reserve_flush_enum flush); bool btrfs_check_space_for_delayed_refs(struct btrfs_fs_info *fs_info); +bool btrfs_find_delayed_tree_ref(struct btrfs_delayed_ref_head *head, + u64 root, u64 parent); static inline u64 btrfs_delayed_ref_owner(struct btrfs_delayed_ref_node *node) { diff --git a/fs/btrfs/direct-io.c b/fs/btrfs/direct-io.c index f9fb2db6a1e4ad29125ae78d09cfa67f9890e2bb..67adbe9d294aecabce353c00cb5cb06bae7b7cba 100644 --- a/fs/btrfs/direct-io.c +++ b/fs/btrfs/direct-io.c @@ -856,21 +856,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -897,10 +913,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index d77498e7671c724e7aae5907c41453dab04aa6a0..feec49e6f9c80115d3e5f180e5ff8a12024f60dd 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2793,7 +2793,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info, readonly = true; } else if (btrfs_is_zoned(fs_info)) { /* Need reset before reusing in a zoned block group */ - space_info->bytes_zone_unusable += len; + btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info, + len); readonly = true; } spin_unlock(&cache->lock); @@ -5471,23 +5472,62 @@ static int check_ref_exists(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u64 parent, int level) { + struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_head *head; struct btrfs_path *path; struct btrfs_extent_inline_ref *iref; int ret; + bool exists = false; path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: ret = lookup_extent_backref(trans, path, &iref, bytenr, root->fs_info->nodesize, parent, btrfs_root_id(root), level, 0); + if (ret != -ENOENT) { + /* + * If we get 0 then we found our reference, return 1, else + * return the error if it's not -ENOENT; + */ + btrfs_free_path(path); + return (ret < 0 ) ? ret : 1; + } + + /* + * We could have a delayed ref with this reference, so look it up while + * we're holding the path open to make sure we don't race with the + * delayed ref running. + */ + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + if (!head) + goto out; + if (!mutex_trylock(&head->mutex)) { + /* + * We're contended, means that the delayed ref is running, get a + * reference and wait for the ref head to be complete and then + * try again. + */ + refcount_inc(&head->refs); + spin_unlock(&delayed_refs->lock); + + btrfs_release_path(path); + + mutex_lock(&head->mutex); + mutex_unlock(&head->mutex); + btrfs_put_delayed_ref_head(head); + goto again; + } + + exists = btrfs_find_delayed_tree_ref(head, root->root_key.objectid, parent); + mutex_unlock(&head->mutex); +out: + spin_unlock(&delayed_refs->lock); btrfs_free_path(path); - if (ret == -ENOENT) - return 0; - if (ret < 0) - return ret; - return 1; + return exists ? 1 : 0; } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa7f8148cd0d06ff0707308e7f49632e3306eac1..c73cd4f89015f692dc2c81e90b22c78926b2daeb 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1496,6 +1496,13 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, free_extent_map(em); em = NULL; + /* + * Although the PageDirty bit might be cleared before entering + * this function, subpage dirty bit is not cleared. + * So clear subpage dirty bit here so next time we won't submit + * page for range already written to disk. + */ + btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); btrfs_set_range_writeback(inode, cur, cur + iosize - 1); if (!PageWriteback(page)) { btrfs_err(inode->root->fs_info, @@ -1503,13 +1510,6 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, page->index, cur, end); } - /* - * Although the PageDirty bit is cleared before entering this - * function, subpage dirty bit is not cleared. - * So clear subpage dirty bit here so next time we won't submit - * page for range already written to disk. - */ - btrfs_folio_clear_dirty(fs_info, page_folio(page), cur, iosize); submit_extent_page(bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page)); diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 81558f90ee80fa5c05d5811ab5e79017e6bdb0ec..10ac5f657e388981a40ccc5dad7d4420570b9091 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c @@ -664,7 +664,7 @@ static noinline int merge_extent_mapping(struct btrfs_inode *inode, start_diff = start - em->start; em->start = start; em->len = end - start; - if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE && !extent_map_is_compressed(em)) + if (em->disk_bytenr < EXTENT_MAP_LAST_BYTE) em->offset += start_diff; return add_extent_mapping(inode, em, 0); } @@ -1147,8 +1147,7 @@ static long btrfs_scan_inode(struct btrfs_inode *inode, struct btrfs_em_shrink_c return 0; /* - * We want to be fast because we can be called from any path trying to - * allocate memory, so if the lock is busy we don't want to spend time + * We want to be fast so if the lock is busy we don't want to spend time * waiting for it - either some task is about to do IO for the inode or * we may have another task shrinking extent maps, here in this code, so * skip this inode. @@ -1191,9 +1190,7 @@ next: /* * Stop if we need to reschedule or there's contention on the * lock. This is to avoid slowing other tasks trying to take the - * lock and because the shrinker might be called during a memory - * allocation path and we want to avoid taking a very long time - * and slowing down all sorts of tasks. + * lock. */ if (need_resched() || rwlock_needbreak(&tree->lock)) break; @@ -1222,12 +1219,7 @@ static long btrfs_scan_root(struct btrfs_root *root, struct btrfs_em_shrink_ctx if (ctx->scanned >= ctx->nr_to_scan) break; - /* - * We may be called from memory allocation paths, so we don't - * want to take too much time and slowdown tasks. - */ - if (need_resched()) - break; + cond_resched(); inode = btrfs_find_first_inode(root, min_ino); } @@ -1285,14 +1277,12 @@ long btrfs_free_extent_maps(struct btrfs_fs_info *fs_info, long nr_to_scan) ctx.last_ino); } - /* - * We may be called from memory allocation paths, so we don't want to - * take too much time and slowdown tasks, so stop if we need reschedule. - */ - while (ctx.scanned < ctx.nr_to_scan && !need_resched()) { + while (ctx.scanned < ctx.nr_to_scan) { struct btrfs_root *root; unsigned long count; + cond_resched(); + spin_lock(&fs_info->fs_roots_radix_lock); count = radix_tree_gang_lookup(&fs_info->fs_roots_radix, (void **)&root, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 21381de906f6073e32a9f360e456da900dbe4d47..9914419f3b7d6105cd16ac013b62d0c831fce4e8 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1603,6 +1603,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct btrfs_inode *inode = BTRFS_I(d_inode(dentry)); struct btrfs_root *root = inode->root; @@ -1612,6 +1613,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1639,7 +1641,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&inode->i_mmap_lock); + else + btrfs_inode_lock(inode, BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1663,7 +1668,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } @@ -1788,7 +1796,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -1857,7 +1868,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&inode->i_mmap_lock); + else + btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP); goto out; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3f9b7507543ad4b0cd7cb7e6e4e16bc1dacf6f22..eaa1dbd313528cf33ea44f7c2bb5cde0701d70ca 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -2697,15 +2697,16 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, u64 offset = bytenr - block_group->start; u64 to_free, to_unusable; int bg_reclaim_threshold = 0; - bool initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + bool initial; u64 reclaimable_unusable; - WARN_ON(!initial && offset + size > block_group->zone_capacity); + spin_lock(&block_group->lock); + initial = ((size == block_group->length) && (block_group->alloc_offset == 0)); + WARN_ON(!initial && offset + size > block_group->zone_capacity); if (!initial) bg_reclaim_threshold = READ_ONCE(sinfo->bg_reclaim_threshold); - spin_lock(&ctl->tree_lock); if (!used) to_free = size; else if (initial) @@ -2718,18 +2719,19 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, to_free = offset + size - block_group->alloc_offset; to_unusable = size - to_free; + spin_lock(&ctl->tree_lock); ctl->free_space += to_free; + spin_unlock(&ctl->tree_lock); /* * If the block group is read-only, we should account freed space into * bytes_readonly. */ - if (!block_group->ro) + if (!block_group->ro) { block_group->zone_unusable += to_unusable; - spin_unlock(&ctl->tree_lock); + WARN_ON(block_group->zone_unusable > block_group->length); + } if (!used) { - spin_lock(&block_group->lock); block_group->alloc_offset -= size; - spin_unlock(&block_group->lock); } reclaimable_unusable = block_group->zone_unusable - @@ -2743,6 +2745,8 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group, btrfs_mark_bg_to_reclaim(block_group); } + spin_unlock(&block_group->lock); + return 0; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 01eab695564727be83101c4b9eecd1a43c1fc908..b1b6564ab68f0ca7ef9ff4d729c07203d3dc00e9 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -714,8 +714,9 @@ out: return ret; } -static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, - u64 end, +static noinline int cow_file_range_inline(struct btrfs_inode *inode, + struct page *locked_page, + u64 offset, u64 end, size_t compressed_size, int compress_type, struct folio *compressed_folio, @@ -739,7 +740,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset, return ret; } - extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached, + if (ret == 0) + locked_page = NULL; + + extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached, clear_flags, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); @@ -1043,10 +1047,10 @@ again: * extent for the subpage case. */ if (total_in < actual_end) - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, NULL, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); else - ret = cow_file_range_inline(inode, start, end, total_compressed, + ret = cow_file_range_inline(inode, NULL, start, end, total_compressed, compress_type, folios[0], false); if (ret <= 0) { if (ret < 0) @@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode, if (!no_inline) { /* lets try to make an inline extent */ - ret = cow_file_range_inline(inode, start, end, 0, + ret = cow_file_range_inline(inode, locked_page, start, end, 0, BTRFS_COMPRESS_NONE, NULL, false); if (ret <= 0) { /* @@ -1581,6 +1585,7 @@ out_unlock: locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); start += cur_alloc_size; } @@ -1594,6 +1599,7 @@ out_unlock: clear_bits |= EXTENT_CLEAR_DATA_RESV; extent_clear_unlock_delalloc(inode, start, end, locked_page, &cached, clear_bits, page_ops); + btrfs_qgroup_free_data(inode, NULL, start, cur_alloc_size, NULL); } return ret; } @@ -2255,6 +2261,7 @@ error: EXTENT_DO_ACCOUNTING, PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK); + btrfs_qgroup_free_data(inode, NULL, cur_offset, end - cur_offset + 1, NULL); } btrfs_free_path(path); return ret; @@ -4188,6 +4195,7 @@ err: btrfs_i_size_write(dir, dir->vfs_inode.i_size - name->len * 2); inode_inc_iversion(&inode->vfs_inode); + inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); ret = btrfs_update_inode(trans, dir); @@ -5660,7 +5668,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) struct inode *inode; struct btrfs_root *root = BTRFS_I(dir)->root; struct btrfs_root *sub_root = root; - struct btrfs_key location; + struct btrfs_key location = { 0 }; u8 di_type = 0; int ret = 0; @@ -7198,6 +7206,12 @@ static void wait_subpage_spinlock(struct page *page) spin_unlock_irq(&subpage->lock); } +static int btrfs_launder_folio(struct folio *folio) +{ + return btrfs_qgroup_free_data(folio_to_inode(folio), NULL, folio_pos(folio), + PAGE_SIZE, NULL); +} + static bool __btrfs_release_folio(struct folio *folio, gfp_t gfp_flags) { if (try_release_extent_mapping(&folio->page, gfp_flags)) { @@ -10133,6 +10147,7 @@ static const struct address_space_operations btrfs_aops = { .writepages = btrfs_writepages, .readahead = btrfs_readahead, .invalidate_folio = btrfs_invalidate_folio, + .launder_folio = btrfs_launder_folio, .release_folio = btrfs_release_folio, .migrate_folio = btrfs_migrate_folio, .dirty_folio = filemap_dirty_folio, diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 32dcea662da38e6a6d404216edcae712d42132c9..fc821aa446f02f31a099635307959ecff22f72c5 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -14,7 +14,7 @@ struct root_name_map { u64 id; - char name[16]; + const char *name; }; static const struct root_name_map root_map[] = { diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 14a8d710001838501623672997ee07b1e956f298..0de9162ff48138b4c3d7d96130f33ce808a3c841 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1648,14 +1648,20 @@ static void scrub_reset_stripe(struct scrub_stripe *stripe) } } +static u32 stripe_length(const struct scrub_stripe *stripe) +{ + ASSERT(stripe->bg); + + return min(BTRFS_STRIPE_LEN, + stripe->bg->start + stripe->bg->length - stripe->logical); +} + static void scrub_submit_extent_sector_read(struct scrub_ctx *sctx, struct scrub_stripe *stripe) { struct btrfs_fs_info *fs_info = stripe->bg->fs_info; struct btrfs_bio *bbio = NULL; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; u64 stripe_len = BTRFS_STRIPE_LEN; int mirror = stripe->mirror_num; int i; @@ -1729,9 +1735,7 @@ static void scrub_submit_initial_read(struct scrub_ctx *sctx, { struct btrfs_fs_info *fs_info = sctx->fs_info; struct btrfs_bio *bbio; - unsigned int nr_sectors = min(BTRFS_STRIPE_LEN, stripe->bg->start + - stripe->bg->length - stripe->logical) >> - fs_info->sectorsize_bits; + unsigned int nr_sectors = stripe_length(stripe) >> fs_info->sectorsize_bits; int mirror = stripe->mirror_num; ASSERT(stripe->bg); @@ -1871,6 +1875,9 @@ static int flush_scrub_stripes(struct scrub_ctx *sctx) stripe = &sctx->stripes[i]; wait_scrub_stripe_io(stripe); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = stripe->physical + stripe_length(stripe); + spin_unlock(&sctx->stat_lock); scrub_reset_stripe(stripe); } out: @@ -2139,7 +2146,9 @@ static int scrub_simple_mirror(struct scrub_ctx *sctx, cur_physical, &found_logical); if (ret > 0) { /* No more extent, just update the accounting */ + spin_lock(&sctx->stat_lock); sctx->stat.last_physical = physical + logical_length; + spin_unlock(&sctx->stat_lock); ret = 0; break; } @@ -2336,6 +2345,10 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, stripe_logical += chunk_logical; ret = scrub_raid56_parity_stripe(sctx, scrub_dev, bg, map, stripe_logical); + spin_lock(&sctx->stat_lock); + sctx->stat.last_physical = min(physical + BTRFS_STRIPE_LEN, + physical_end); + spin_unlock(&sctx->stat_lock); if (ret) goto out; goto next; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 4ca711a773efa9f3823d36f5096503761f2f01fc..619fa0b8b3f6f9818e72971ea7e1d42b4b298310 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -347,7 +347,7 @@ struct name_cache_entry { int ret; int need_later_update; int name_len; - char name[]; + char name[] __counted_by(name_len); }; /* See the comment at lru_cache.h about struct btrfs_lru_cache_entry. */ @@ -6157,25 +6157,51 @@ static int send_write_or_clone(struct send_ctx *sctx, u64 offset = key->offset; u64 end; u64 bs = sctx->send_root->fs_info->sectorsize; + struct btrfs_file_extent_item *ei; + u64 disk_byte; + u64 data_offset; + u64 num_bytes; + struct btrfs_inode_info info = { 0 }; end = min_t(u64, btrfs_file_extent_end(path), sctx->cur_inode_size); if (offset >= end) return 0; - if (clone_root && IS_ALIGNED(end, bs)) { - struct btrfs_file_extent_item *ei; - u64 disk_byte; - u64 data_offset; + num_bytes = end - offset; - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); - data_offset = btrfs_file_extent_offset(path->nodes[0], ei); - ret = clone_range(sctx, path, clone_root, disk_byte, - data_offset, offset, end - offset); - } else { - ret = send_extent_data(sctx, path, offset, end - offset); - } + if (!clone_root) + goto write_data; + + if (IS_ALIGNED(end, bs)) + goto clone_data; + + /* + * If the extent end is not aligned, we can clone if the extent ends at + * the i_size of the inode and the clone range ends at the i_size of the + * source inode, otherwise the clone operation fails with -EINVAL. + */ + if (end != sctx->cur_inode_size) + goto write_data; + + ret = get_inode_info(clone_root->root, clone_root->ino, &info); + if (ret < 0) + return ret; + + if (clone_root->offset + num_bytes == info.size) + goto clone_data; + +write_data: + ret = send_extent_data(sctx, path, offset, num_bytes); + sctx->cur_inode_next_write_offset = end; + return ret; + +clone_data: + ei = btrfs_item_ptr(path->nodes[0], path->slots[0], + struct btrfs_file_extent_item); + disk_byte = btrfs_file_extent_disk_bytenr(path->nodes[0], ei); + data_offset = btrfs_file_extent_offset(path->nodes[0], ei); + ret = clone_range(sctx, path, clone_root, disk_byte, data_offset, offset, + num_bytes); sctx->cur_inode_next_write_offset = end; return ret; } diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 9ac94d3119e8ddd5fea47652a6114955c2ec9142..68e14fd486384e3d47236f4c40d28501fcc8ccd6 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -316,7 +316,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, found->bytes_used += block_group->used; found->disk_used += block_group->used * factor; found->bytes_readonly += block_group->bytes_super; - found->bytes_zone_unusable += block_group->zone_unusable; + btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable); if (block_group->length > 0) found->full = 0; btrfs_try_granting_tickets(info, found); @@ -583,8 +583,7 @@ again: spin_lock(&cache->lock); avail = cache->length - cache->used - cache->pinned - - cache->reserved - cache->delalloc_bytes - - cache->bytes_super - cache->zone_unusable; + cache->reserved - cache->bytes_super - cache->zone_unusable; btrfs_info(fs_info, "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s", cache->start, cache->length, cache->used, cache->pinned, diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h index 4db8a0267c1606fe7b671ff349962ef18eb4317b..88b44221ce9782304fc4c7ead17c49fd593e451b 100644 --- a/fs/btrfs/space-info.h +++ b/fs/btrfs/space-info.h @@ -249,6 +249,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \ DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); +DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); int btrfs_init_space_info(struct btrfs_fs_info *fs_info); void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 08d33cb372fbcb55d2e33388314cd1e78961b6eb..98fa0f382480a2a51420d586b1e2a2fa6c58d025 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "messages.h" #include "delayed-inode.h" #include "ctree.h" @@ -683,8 +684,11 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, ret = false; if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { - if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) + if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { btrfs_info(info, "disk space caching is enabled"); + btrfs_warn(info, +"space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); + } if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) btrfs_info(info, "using free-space-tree"); } @@ -2398,7 +2402,13 @@ static long btrfs_nr_cached_objects(struct super_block *sb, struct shrink_contro trace_btrfs_extent_map_shrinker_count(fs_info, nr); - return nr; + /* + * Only report the real number for DEBUG builds, as there are reports of + * serious performance degradation caused by too frequent shrinks. + */ + if (IS_ENABLED(CONFIG_BTRFS_DEBUG)) + return nr; + return 0; } static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_control *sc) @@ -2406,6 +2416,15 @@ static long btrfs_free_cached_objects(struct super_block *sb, struct shrink_cont const long nr_to_scan = min_t(unsigned long, LONG_MAX, sc->nr_to_scan); struct btrfs_fs_info *fs_info = btrfs_sb(sb); + /* + * We may be called from any task trying to allocate memory and we don't + * want to slow it down with scanning and dropping extent maps. It would + * also cause heavy lock contention if many tasks concurrently enter + * here. Therefore only allow kswapd tasks to scan and drop extent maps. + */ + if (!current_is_kswapd()) + return 0; + return btrfs_free_extent_maps(fs_info, nr_to_scan); } diff --git a/fs/btrfs/tests/extent-map-tests.c b/fs/btrfs/tests/extent-map-tests.c index ebec4ab361b82ac58e57414a3ec323034202d4f4..56e61ac1cc64c87d4dde3e6ddf92a4daaef650d3 100644 --- a/fs/btrfs/tests/extent-map-tests.c +++ b/fs/btrfs/tests/extent-map-tests.c @@ -900,6 +900,102 @@ out: return ret; } +/* + * Test a regression for compressed extent map adjustment when we attempt to + * add an extent map that is partially overlapped by another existing extent + * map. The resulting extent map offset was left unchanged despite having + * incremented its start offset. + */ +static int test_case_8(struct btrfs_fs_info *fs_info, struct btrfs_inode *inode) +{ + struct extent_map_tree *em_tree = &inode->extent_tree; + struct extent_map *em; + int ret; + int ret2; + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + return -ENOMEM; + } + + /* Compressed extent for the file range [120K, 128K). */ + em->start = SZ_1K * 120; + em->len = SZ_8K; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_8K; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, em->start, em->len); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [120K, 128K)"); + goto out; + } + + em = alloc_extent_map(); + if (!em) { + test_std_err(TEST_ALLOC_EXTENT_MAP); + ret = -ENOMEM; + goto out; + } + + /* + * Compressed extent for the file range [108K, 144K), which overlaps + * with the [120K, 128K) we previously inserted. + */ + em->start = SZ_1K * 108; + em->len = SZ_1K * 36; + em->disk_num_bytes = SZ_4K; + em->ram_bytes = SZ_1K * 36; + em->flags |= EXTENT_FLAG_COMPRESS_ZLIB; + + /* + * Try to add the extent map but with a search range of [140K, 144K), + * this should succeed and adjust the extent map to the range + * [128K, 144K), with a length of 16K and an offset of 20K. + * + * This simulates a scenario where in the subvolume tree of an inode we + * have a compressed file extent item for the range [108K, 144K) and we + * have an overlapping compressed extent map for the range [120K, 128K), + * which was created by an encoded write, but its ordered extent was not + * yet completed, so the subvolume tree doesn't have yet the file extent + * item for that range - we only have the extent map in the inode's + * extent map tree. + */ + write_lock(&em_tree->lock); + ret = btrfs_add_extent_mapping(inode, &em, SZ_1K * 140, SZ_4K); + write_unlock(&em_tree->lock); + free_extent_map(em); + if (ret < 0) { + test_err("couldn't add extent map for range [108K, 144K)"); + goto out; + } + + if (em->start != SZ_128K) { + test_err("unexpected extent map start %llu (should be 128K)", em->start); + ret = -EINVAL; + goto out; + } + if (em->len != SZ_16K) { + test_err("unexpected extent map length %llu (should be 16K)", em->len); + ret = -EINVAL; + goto out; + } + if (em->offset != SZ_1K * 20) { + test_err("unexpected extent map offset %llu (should be 20K)", em->offset); + ret = -EINVAL; + goto out; + } +out: + ret2 = free_extent_map_tree(inode); + if (ret == 0) + ret = ret2; + + return ret; +} + struct rmap_test_vector { u64 raid_type; u64 physical_start; @@ -1076,6 +1172,9 @@ int btrfs_test_extent_map(void) if (ret) goto out; ret = test_case_7(fs_info, BTRFS_I(inode)); + if (ret) + goto out; + ret = test_case_8(fs_info, BTRFS_I(inode)); if (ret) goto out; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6388786fd8b5d315be4a5f94a0172c79421fdd52..634d69964fe4c1aa8caa6d71ab06ada779cf226d 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -569,9 +569,10 @@ static int check_dir_item(struct extent_buffer *leaf, /* dir type check */ dir_type = btrfs_dir_ftype(leaf, di); - if (unlikely(dir_type >= BTRFS_FT_MAX)) { + if (unlikely(dir_type <= BTRFS_FT_UNKNOWN || + dir_type >= BTRFS_FT_MAX)) { dir_item_err(leaf, slot, - "invalid dir item type, have %u expect [0, %u)", + "invalid dir item type, have %u expect (0, %u)", dir_type, BTRFS_FT_MAX); return -EUCLEAN; } @@ -634,7 +635,7 @@ static int check_dir_item(struct extent_buffer *leaf, */ if (key->type == BTRFS_DIR_ITEM_KEY || key->type == BTRFS_XATTR_ITEM_KEY) { - char namebuf[max(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; + char namebuf[MAX(BTRFS_NAME_LEN, XATTR_NAME_MAX)]; read_extent_buffer(leaf, namebuf, (unsigned long)(di + 1), name_len); @@ -1289,6 +1290,19 @@ static void extent_err(const struct extent_buffer *eb, int slot, va_end(args); } +static bool is_valid_dref_root(u64 rootid) +{ + /* + * The following tree root objectids are allowed to have a data backref: + * - subvolume trees + * - data reloc tree + * - tree root + * For v1 space cache + */ + return is_fstree(rootid) || rootid == BTRFS_DATA_RELOC_TREE_OBJECTID || + rootid == BTRFS_ROOT_TREE_OBJECTID; +} + static int check_extent_item(struct extent_buffer *leaf, struct btrfs_key *key, int slot, struct btrfs_key *prev_key) @@ -1441,6 +1455,8 @@ static int check_extent_item(struct extent_buffer *leaf, struct btrfs_extent_data_ref *dref; struct btrfs_shared_data_ref *sref; u64 seq; + u64 dref_root; + u64 dref_objectid; u64 dref_offset; u64 inline_offset; u8 inline_type; @@ -1484,11 +1500,26 @@ static int check_extent_item(struct extent_buffer *leaf, */ case BTRFS_EXTENT_DATA_REF_KEY: dref = (struct btrfs_extent_data_ref *)(&iref->offset); + dref_root = btrfs_extent_data_ref_root(leaf, dref); + dref_objectid = btrfs_extent_data_ref_objectid(leaf, dref); dref_offset = btrfs_extent_data_ref_offset(leaf, dref); seq = hash_extent_data_ref( btrfs_extent_data_ref_root(leaf, dref), btrfs_extent_data_ref_objectid(leaf, dref), btrfs_extent_data_ref_offset(leaf, dref)); + if (unlikely(!is_valid_dref_root(dref_root))) { + extent_err(leaf, slot, + "invalid data ref root value %llu", + dref_root); + return -EUCLEAN; + } + if (unlikely(dref_objectid < BTRFS_FIRST_FREE_OBJECTID || + dref_objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid data ref objectid value %llu", + dref_root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(dref_offset, fs_info->sectorsize))) { extent_err(leaf, slot, @@ -1627,6 +1658,8 @@ static int check_extent_data_ref(struct extent_buffer *leaf, return -EUCLEAN; } for (; ptr < end; ptr += sizeof(*dref)) { + u64 root; + u64 objectid; u64 offset; /* @@ -1634,7 +1667,22 @@ static int check_extent_data_ref(struct extent_buffer *leaf, * overflow from the leaf due to hash collisions. */ dref = (struct btrfs_extent_data_ref *)ptr; + root = btrfs_extent_data_ref_root(leaf, dref); + objectid = btrfs_extent_data_ref_objectid(leaf, dref); offset = btrfs_extent_data_ref_offset(leaf, dref); + if (unlikely(!is_valid_dref_root(root))) { + extent_err(leaf, slot, + "invalid extent data backref root value %llu", + root); + return -EUCLEAN; + } + if (unlikely(objectid < BTRFS_FIRST_FREE_OBJECTID || + objectid > BTRFS_LAST_FREE_OBJECTID)) { + extent_err(leaf, slot, + "invalid extent data backref objectid value %llu", + root); + return -EUCLEAN; + } if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) { extent_err(leaf, slot, "invalid extent data backref offset, have %llu expect aligned to %u", @@ -1716,6 +1764,72 @@ static int check_raid_stripe_extent(const struct extent_buffer *leaf, return 0; } +static int check_dev_extent_item(const struct extent_buffer *leaf, + const struct btrfs_key *key, + int slot, + struct btrfs_key *prev_key) +{ + struct btrfs_dev_extent *de; + const u32 sectorsize = leaf->fs_info->sectorsize; + + de = btrfs_item_ptr(leaf, slot, struct btrfs_dev_extent); + /* Basic fixed member checks. */ + if (unlikely(btrfs_dev_extent_chunk_tree(leaf, de) != + BTRFS_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk tree id, has %llu expect %llu", + btrfs_dev_extent_chunk_tree(leaf, de), + BTRFS_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + if (unlikely(btrfs_dev_extent_chunk_objectid(leaf, de) != + BTRFS_FIRST_CHUNK_TREE_OBJECTID)) { + generic_err(leaf, slot, + "invalid dev extent chunk objectid, has %llu expect %llu", + btrfs_dev_extent_chunk_objectid(leaf, de), + BTRFS_FIRST_CHUNK_TREE_OBJECTID); + return -EUCLEAN; + } + /* Alignment check. */ + if (unlikely(!IS_ALIGNED(key->offset, sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent key.offset, has %llu not aligned to %u", + key->offset, sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_chunk_offset(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent chunk offset, has %llu not aligned to %u", + btrfs_dev_extent_chunk_objectid(leaf, de), + sectorsize); + return -EUCLEAN; + } + if (unlikely(!IS_ALIGNED(btrfs_dev_extent_length(leaf, de), + sectorsize))) { + generic_err(leaf, slot, + "invalid dev extent length, has %llu not aligned to %u", + btrfs_dev_extent_length(leaf, de), sectorsize); + return -EUCLEAN; + } + /* Overlap check with previous dev extent. */ + if (slot && prev_key->objectid == key->objectid && + prev_key->type == key->type) { + struct btrfs_dev_extent *prev_de; + u64 prev_len; + + prev_de = btrfs_item_ptr(leaf, slot - 1, struct btrfs_dev_extent); + prev_len = btrfs_dev_extent_length(leaf, prev_de); + if (unlikely(prev_key->offset + prev_len > key->offset)) { + generic_err(leaf, slot, + "dev extent overlap, prev offset %llu len %llu current offset %llu", + prev_key->objectid, prev_len, key->offset); + return -EUCLEAN; + } + } + return 0; +} + /* * Common point to switch the item-specific validation. */ @@ -1752,6 +1866,9 @@ static enum btrfs_tree_block_status check_leaf_item(struct extent_buffer *leaf, case BTRFS_DEV_ITEM_KEY: ret = check_dev_item(leaf, key, slot); break; + case BTRFS_DEV_EXTENT_KEY: + ret = check_dev_extent_item(leaf, key, slot, prev_key); + break; case BTRFS_INODE_ITEM_KEY: ret = check_inode_item(leaf, key, slot); break; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 8c16bc5250ef56cb8d0864d3b4b054466d57f250..c4744a02db753c761491b5bf4ca12977342db114 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -246,7 +246,8 @@ static void finish_netfs_read(struct ceph_osd_request *req) if (err >= 0) { if (sparse && err > 0) err = ceph_sparse_ext_map_end(op); - if (err < subreq->len) + if (err < subreq->len && + subreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); if (IS_ENCRYPTED(inode) && err > 0) { err = ceph_fscrypt_decrypt_extents(inode, @@ -282,7 +283,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq) size_t len; int mode; - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); if (subreq->start >= inode->i_size) @@ -424,6 +426,9 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file) struct ceph_netfs_request_data *priv; int ret = 0; + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); + if (rreq->origin != NETFS_READAHEAD) return 0; @@ -498,6 +503,11 @@ const struct netfs_request_ops ceph_netfs_ops = { }; #ifdef CONFIG_CEPH_FSCACHE +static void ceph_set_page_fscache(struct page *page) +{ + folio_start_private_2(page_folio(page)); /* [DEPRECATED] */ +} + static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; @@ -515,6 +525,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b ceph_fscache_write_terminated, inode, true, caching); } #else +static inline void ceph_set_page_fscache(struct page *page) +{ +} + static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } @@ -706,6 +720,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = wlen; set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { @@ -789,6 +805,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ + err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 @@ -1062,7 +1080,8 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page)) { + if (PageWriteback(page) || + PagePrivate2(page) /* [DEPRECATED] */) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", page); unlock_page(page); @@ -1070,6 +1089,7 @@ get_more_pages: } doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); + folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */ } if (!clear_page_dirty_for_io(page)) { @@ -1254,6 +1274,8 @@ new_request: } set_page_writeback(page); + if (caching) + ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index e98aa8219303616d6e4526541b474f30bf908c43..808c9c04827683ab069e86c617b7b90441e46552 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci) * CHECK_CAPS_AUTHONLY - we should only check the auth cap * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without * further delay. + * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without + * further delay. */ void ceph_check_caps(struct ceph_inode_info *ci, int flags) { @@ -2097,7 +2099,7 @@ retry: } doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s " - "flushing %s issued %s revoking %s retain %s %s%s%s\n", + "flushing %s issued %s revoking %s retain %s %s%s%s%s\n", inode, ceph_vinop(inode), ceph_cap_string(file_wanted), ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps), ceph_cap_string(ci->i_flushing_caps), @@ -2105,7 +2107,8 @@ retry: ceph_cap_string(retain), (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "", (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "", - (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : ""); + (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "", + (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : ""); /* * If we no longer need to hold onto old our caps, and we may @@ -2180,6 +2183,11 @@ retry: queue_writeback = true; } + if (flags & CHECK_CAPS_FLUSH_FORCE) { + doutc(cl, "force to flush caps\n"); + goto ack; + } + if (cap == ci->i_auth_cap && (cap->issued & CEPH_CAP_FILE_WR)) { /* request larger max_size from MDS? */ @@ -3510,6 +3518,8 @@ static void handle_cap_grant(struct inode *inode, bool queue_invalidate = false; bool deleted_inode = false; bool fill_inline = false; + bool revoke_wait = false; + int flags = 0; /* * If there is at least one crypto block then we'll trust @@ -3705,16 +3715,18 @@ static void handle_cap_grant(struct inode *inode, ceph_cap_string(cap->issued), ceph_cap_string(newcaps), ceph_cap_string(revoking)); if (S_ISREG(inode->i_mode) && - (revoking & used & CEPH_CAP_FILE_BUFFER)) + (revoking & used & CEPH_CAP_FILE_BUFFER)) { writeback = true; /* initiate writeback; will delay ack */ - else if (queue_invalidate && + revoke_wait = true; + } else if (queue_invalidate && revoking == CEPH_CAP_FILE_CACHE && - (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) - ; /* do nothing yet, invalidation will be queued */ - else if (cap == ci->i_auth_cap) + (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) { + revoke_wait = true; /* do nothing yet, invalidation will be queued */ + } else if (cap == ci->i_auth_cap) { check_caps = 1; /* check auth cap only */ - else + } else { check_caps = 2; /* check all caps */ + } /* If there is new caps, try to wake up the waiters */ if (~cap->issued & newcaps) wake = true; @@ -3741,8 +3753,9 @@ static void handle_cap_grant(struct inode *inode, BUG_ON(cap->issued & ~cap->implemented); /* don't let check_caps skip sending a response to MDS for revoke msgs */ - if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { + if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) { cap->mds_wanted = 0; + flags |= CHECK_CAPS_FLUSH_FORCE; if (cap == ci->i_auth_cap) check_caps = 1; /* check auth cap only */ else @@ -3798,9 +3811,9 @@ static void handle_cap_grant(struct inode *inode, mutex_unlock(&session->s_mutex); if (check_caps == 1) - ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL); else if (check_caps == 2) - ceph_check_caps(ci, CHECK_CAPS_NOINVAL); + ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL); } /* diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 8f8de8f33abbfb5e1cc3f9114cfd7a74345e89a9..71cd70514efa5562bbee937b176f6431abd59363 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,8 +577,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); spin_lock_init(&ci->i_ceph_lock); diff --git a/fs/ceph/super.h b/fs/ceph/super.h index b63b4cd9b5b685a930bc33673f28e7c48a93d605..6e817bf1337c6539ef7ccf3341f7e633fcbf93bd 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -200,9 +200,10 @@ struct ceph_cap { struct list_head caps_item; }; -#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ -#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ -#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */ +#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */ +#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */ +#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */ struct ceph_cap_flush { u64 tid; diff --git a/fs/erofs/dir.c b/fs/erofs/dir.c index 2193a6710c8f666705a0ac72849fb62768dbca4a..c3b90abdee37af537514449a6b25baae314e04eb 100644 --- a/fs/erofs/dir.c +++ b/fs/erofs/dir.c @@ -8,19 +8,15 @@ static int erofs_fill_dentries(struct inode *dir, struct dir_context *ctx, void *dentry_blk, struct erofs_dirent *de, - unsigned int nameoff, unsigned int maxsize) + unsigned int nameoff0, unsigned int maxsize) { - const struct erofs_dirent *end = dentry_blk + nameoff; + const struct erofs_dirent *end = dentry_blk + nameoff0; while (de < end) { - const char *de_name; + unsigned char d_type = fs_ftype_to_dtype(de->file_type); + unsigned int nameoff = le16_to_cpu(de->nameoff); + const char *de_name = (char *)dentry_blk + nameoff; unsigned int de_namelen; - unsigned char d_type; - - d_type = fs_ftype_to_dtype(de->file_type); - - nameoff = le16_to_cpu(de->nameoff); - de_name = (char *)dentry_blk + nameoff; /* the last dirent in the block? */ if (de + 1 >= end) @@ -52,21 +48,20 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct super_block *sb = dir->i_sb; unsigned long bsz = sb->s_blocksize; - const size_t dirsize = i_size_read(dir); - unsigned int i = erofs_blknr(sb, ctx->pos); unsigned int ofs = erofs_blkoff(sb, ctx->pos); int err = 0; bool initial = true; buf.mapping = dir->i_mapping; - while (ctx->pos < dirsize) { + while (ctx->pos < dir->i_size) { + erofs_off_t dbstart = ctx->pos - ofs; struct erofs_dirent *de; unsigned int nameoff, maxsize; - de = erofs_bread(&buf, erofs_pos(sb, i), EROFS_KMAP); + de = erofs_bread(&buf, dbstart, EROFS_KMAP); if (IS_ERR(de)) { erofs_err(sb, "fail to readdir of logical block %u of nid %llu", - i, EROFS_I(dir)->nid); + erofs_blknr(sb, dbstart), EROFS_I(dir)->nid); err = PTR_ERR(de); break; } @@ -79,25 +74,19 @@ static int erofs_readdir(struct file *f, struct dir_context *ctx) break; } - maxsize = min_t(unsigned int, dirsize - ctx->pos + ofs, bsz); - + maxsize = min_t(unsigned int, dir->i_size - dbstart, bsz); /* search dirents at the arbitrary position */ if (initial) { initial = false; - ofs = roundup(ofs, sizeof(struct erofs_dirent)); - ctx->pos = erofs_pos(sb, i) + ofs; - if (ofs >= nameoff) - goto skip_this; + ctx->pos = dbstart + ofs; } err = erofs_fill_dentries(dir, ctx, de, (void *)de + ofs, nameoff, maxsize); if (err) break; -skip_this: - ctx->pos = erofs_pos(sb, i) + maxsize; - ++i; + ctx->pos = dbstart + maxsize; ofs = 0; } erofs_put_metabuf(&buf); diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c index 43c09aae2afcd19aa01c0380f06120535c8d78c7..419432be3223b72bb4cf7d3da550697c06ae9306 100644 --- a/fs/erofs/inode.c +++ b/fs/erofs/inode.c @@ -257,25 +257,23 @@ static int erofs_fill_inode(struct inode *inode) goto out_unlock; } + mapping_set_large_folios(inode->i_mapping); if (erofs_inode_is_data_compressed(vi->datalayout)) { #ifdef CONFIG_EROFS_FS_ZIP DO_ONCE_LITE_IF(inode->i_blkbits != PAGE_SHIFT, erofs_info, inode->i_sb, "EXPERIMENTAL EROFS subpage compressed block support in use. Use at your own risk!"); inode->i_mapping->a_ops = &z_erofs_aops; - err = 0; - goto out_unlock; -#endif +#else err = -EOPNOTSUPP; - goto out_unlock; - } - inode->i_mapping->a_ops = &erofs_raw_access_aops; - mapping_set_large_folios(inode->i_mapping); +#endif + } else { + inode->i_mapping->a_ops = &erofs_raw_access_aops; #ifdef CONFIG_EROFS_FS_ONDEMAND - if (erofs_is_fscache_mode(inode->i_sb)) - inode->i_mapping->a_ops = &erofs_fscache_access_aops; + if (erofs_is_fscache_mode(inode->i_sb)) + inode->i_mapping->a_ops = &erofs_fscache_access_aops; #endif - + } out_unlock: erofs_put_metabuf(&buf); return err; diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 736607675396e8c340bfcde2ae4432167870a3b3..45dc15ebd870ddf2a753fe10b4242d4a3ec5e69c 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -220,7 +220,7 @@ struct erofs_buf { }; #define __EROFS_BUF_INITIALIZER ((struct erofs_buf){ .page = NULL }) -#define erofs_blknr(sb, addr) ((addr) >> (sb)->s_blocksize_bits) +#define erofs_blknr(sb, addr) ((erofs_blk_t)((addr) >> (sb)->s_blocksize_bits)) #define erofs_blkoff(sb, addr) ((addr) & ((sb)->s_blocksize - 1)) #define erofs_pos(sb, blk) ((erofs_off_t)(blk) << (sb)->s_blocksize_bits) #define erofs_iblks(i) (round_up((i)->i_size, i_blocksize(i)) >> (i)->i_blkbits) diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 32ce5b35e1dff8eb25c199b09f48e1b7797457ab..6cb5c8916174be4ead0500d421da82df985cf5f6 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -108,22 +108,6 @@ static void erofs_free_inode(struct inode *inode) kmem_cache_free(erofs_inode_cachep, vi); } -static bool check_layout_compatibility(struct super_block *sb, - struct erofs_super_block *dsb) -{ - const unsigned int feature = le32_to_cpu(dsb->feature_incompat); - - EROFS_SB(sb)->feature_incompat = feature; - - /* check if current kernel meets all mandatory requirements */ - if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { - erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", - feature & ~EROFS_ALL_FEATURE_INCOMPAT); - return false; - } - return true; -} - /* read variable-sized metadata, offset will be aligned by 4-byte */ void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, erofs_off_t *offset, int *lengthp) @@ -279,7 +263,7 @@ static int erofs_scan_devices(struct super_block *sb, static int erofs_read_superblock(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_buf buf = __EROFS_BUF_INITIALIZER; struct erofs_super_block *dsb; void *data; @@ -291,9 +275,7 @@ static int erofs_read_superblock(struct super_block *sb) return PTR_ERR(data); } - sbi = EROFS_SB(sb); dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); - ret = -EINVAL; if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { erofs_err(sb, "cannot find valid erofs superblock"); @@ -318,8 +300,12 @@ static int erofs_read_superblock(struct super_block *sb) } ret = -EINVAL; - if (!check_layout_compatibility(sb, dsb)) + sbi->feature_incompat = le32_to_cpu(dsb->feature_incompat); + if (sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT) { + erofs_err(sb, "unidentified incompatible feature %x, please upgrade kernel", + sbi->feature_incompat & ~EROFS_ALL_FEATURE_INCOMPAT); goto out; + } sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; if (sbi->sb_size > PAGE_SIZE - EROFS_SUPER_OFFSET) { diff --git a/fs/erofs/zutil.c b/fs/erofs/zutil.c index 9b53883e5caf8acb5183b1d2e93cda3cf6b27fce..37afe202484091a90a90634e525954eb601b59e1 100644 --- a/fs/erofs/zutil.c +++ b/fs/erofs/zutil.c @@ -111,7 +111,8 @@ int z_erofs_gbuf_growsize(unsigned int nrpages) out: if (i < z_erofs_gbuf_count && tmp_pages) { for (j = 0; j < nrpages; ++j) - if (tmp_pages[j] && tmp_pages[j] != gbuf->pages[j]) + if (tmp_pages[j] && (j >= gbuf->nrpages || + tmp_pages[j] != gbuf->pages[j])) __free_page(tmp_pages[j]); kfree(tmp_pages); } diff --git a/fs/exec.c b/fs/exec.c index a126e3d1cacb01174fec72d4211bff2a4cd9f52a..50e76cc633c4baec0aa90240436c46f63c9a0e32 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1692,6 +1692,7 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) unsigned int mode; vfsuid_t vfsuid; vfsgid_t vfsgid; + int err; if (!mnt_may_suid(file->f_path.mnt)) return; @@ -1708,12 +1709,17 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file) /* Be careful if suid/sgid is set */ inode_lock(inode); - /* reload atomically mode/uid/gid now that lock held */ + /* Atomically reload and check mode/uid/gid now that lock held. */ mode = inode->i_mode; vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + err = inode_permission(idmap, inode, MAY_EXEC); inode_unlock(inode); + /* Did the exec bit vanish out from under us? Give up. */ + if (err) + return; + /* We ignore suid/sgid if there are no mappings for them in the ns */ if (!vfsuid_has_mapping(bprm->cred->user_ns, vfsuid) || !vfsgid_has_mapping(bprm->cred->user_ns, vfsgid)) diff --git a/fs/file.c b/fs/file.c index a3b72aa64f1164380fdaa6524081a4ac956831da..655338effe9c725574cf19e737a9384e7a460356 100644 --- a/fs/file.c +++ b/fs/file.c @@ -46,27 +46,23 @@ static void free_fdtable_rcu(struct rcu_head *rcu) #define BITBIT_NR(nr) BITS_TO_LONGS(BITS_TO_LONGS(nr)) #define BITBIT_SIZE(nr) (BITBIT_NR(nr) * sizeof(long)) +#define fdt_words(fdt) ((fdt)->max_fds / BITS_PER_LONG) // words in ->open_fds /* * Copy 'count' fd bits from the old table to the new table and clear the extra * space if any. This does not copy the file pointers. Called with the files * spinlock held for write. */ -static void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, - unsigned int count) +static inline void copy_fd_bitmaps(struct fdtable *nfdt, struct fdtable *ofdt, + unsigned int copy_words) { - unsigned int cpy, set; - - cpy = count / BITS_PER_BYTE; - set = (nfdt->max_fds - count) / BITS_PER_BYTE; - memcpy(nfdt->open_fds, ofdt->open_fds, cpy); - memset((char *)nfdt->open_fds + cpy, 0, set); - memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy); - memset((char *)nfdt->close_on_exec + cpy, 0, set); - - cpy = BITBIT_SIZE(count); - set = BITBIT_SIZE(nfdt->max_fds) - cpy; - memcpy(nfdt->full_fds_bits, ofdt->full_fds_bits, cpy); - memset((char *)nfdt->full_fds_bits + cpy, 0, set); + unsigned int nwords = fdt_words(nfdt); + + bitmap_copy_and_extend(nfdt->open_fds, ofdt->open_fds, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->close_on_exec, ofdt->close_on_exec, + copy_words * BITS_PER_LONG, nwords * BITS_PER_LONG); + bitmap_copy_and_extend(nfdt->full_fds_bits, ofdt->full_fds_bits, + copy_words, nwords); } /* @@ -84,7 +80,7 @@ static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt) memcpy(nfdt->fd, ofdt->fd, cpy); memset((char *)nfdt->fd + cpy, 0, set); - copy_fd_bitmaps(nfdt, ofdt, ofdt->max_fds); + copy_fd_bitmaps(nfdt, ofdt, fdt_words(ofdt)); } /* @@ -379,7 +375,7 @@ struct files_struct *dup_fd(struct files_struct *oldf, unsigned int max_fds, int open_files = sane_fdtable_size(old_fdt, max_fds); } - copy_fd_bitmaps(new_fdt, old_fdt, open_files); + copy_fd_bitmaps(new_fdt, old_fdt, open_files / BITS_PER_LONG); old_fds = old_fdt->fd; new_fds = new_fdt->fd; @@ -1248,6 +1244,7 @@ __releases(&files->file_lock) * tables and this condition does not arise without those. */ fdt = files_fdtable(files); + fd = array_index_nospec(fd, fdt->max_fds); tofree = fdt->fd[fd]; if (!tofree && fd_is_open(fd, fdt)) goto Ebusy; diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 9eb191b5c4de124b3b469f5487beebbaf7630eb3..7146038b2fe7d69d2dbd57d80a100e5822a5d6e9 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1618,9 +1618,11 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size, this_num = min_t(unsigned, num, PAGE_SIZE - offset); err = fuse_copy_page(cs, &page, offset, this_num, 0); - if (!err && offset == 0 && - (this_num == PAGE_SIZE || file_size == end)) + if (!PageUptodate(page) && !err && offset == 0 && + (this_num == PAGE_SIZE || file_size == end)) { + zero_user_segment(page, this_num, PAGE_SIZE); SetPageUptodate(page); + } unlock_page(page); put_page(page); diff --git a/fs/inode.c b/fs/inode.c index 86670941884b48737acdfb46858d4611b0592cc0..10c4619faeef8cb81d84a91ec2d982d5a1a51a5c 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -488,6 +488,39 @@ static void inode_lru_list_del(struct inode *inode) this_cpu_dec(nr_unused); } +static void inode_pin_lru_isolating(struct inode *inode) +{ + lockdep_assert_held(&inode->i_lock); + WARN_ON(inode->i_state & (I_LRU_ISOLATING | I_FREEING | I_WILL_FREE)); + inode->i_state |= I_LRU_ISOLATING; +} + +static void inode_unpin_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); + inode->i_state &= ~I_LRU_ISOLATING; + smp_mb(); + wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); +} + +static void inode_wait_for_lru_isolating(struct inode *inode) +{ + spin_lock(&inode->i_lock); + if (inode->i_state & I_LRU_ISOLATING) { + DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); + wait_queue_head_t *wqh; + + wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + spin_unlock(&inode->i_lock); + __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + spin_lock(&inode->i_lock); + WARN_ON(inode->i_state & I_LRU_ISOLATING); + } + spin_unlock(&inode->i_lock); +} + /** * inode_sb_list_add - add inode to the superblock list of inodes * @inode: inode to add @@ -657,6 +690,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_wait_for_lru_isolating(inode); + /* * Wait for flusher thread to be done with the inode so that filesystem * does not start destroying it while writeback is still running. Since @@ -855,7 +890,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, * be under pressure before the cache inside the highmem zone. */ if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) { - __iget(inode); + inode_pin_lru_isolating(inode); spin_unlock(&inode->i_lock); spin_unlock(lru_lock); if (remove_inode_buffers(inode)) { @@ -867,7 +902,7 @@ static enum lru_status inode_lru_isolate(struct list_head *item, __count_vm_events(PGINODESTEAL, reap); mm_account_reclaimed_pages(reap); } - iput(inode); + inode_unpin_lru_isolating(inode); spin_lock(lru_lock); return LRU_RETRY; } diff --git a/fs/libfs.c b/fs/libfs.c index 8aa34870449fe18ce5d44ec525dd8e666de95a23..02602d00939e9688250770ae5195cab2cb5b2663 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -450,6 +450,14 @@ void simple_offset_destroy(struct offset_ctx *octx) mtree_destroy(&octx->mt); } +static int offset_dir_open(struct inode *inode, struct file *file) +{ + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + + file->private_data = (void *)ctx->next_offset; + return 0; +} + /** * offset_dir_llseek - Advance the read position of a directory descriptor * @file: an open directory whose position is to be updated @@ -463,6 +471,9 @@ void simple_offset_destroy(struct offset_ctx *octx) */ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) { + struct inode *inode = file->f_inode; + struct offset_ctx *ctx = inode->i_op->get_offset_ctx(inode); + switch (whence) { case SEEK_CUR: offset += file->f_pos; @@ -476,7 +487,8 @@ static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence) } /* In this case, ->private_data is protected by f_pos_lock */ - file->private_data = NULL; + if (!offset) + file->private_data = (void *)ctx->next_offset; return vfs_setpos(file, offset, LONG_MAX); } @@ -507,7 +519,7 @@ static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry) inode->i_ino, fs_umode_to_dtype(inode->i_mode)); } -static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) +static void offset_iterate_dir(struct inode *inode, struct dir_context *ctx, long last_index) { struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode); struct dentry *dentry; @@ -515,17 +527,21 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) while (true) { dentry = offset_find_next(octx, ctx->pos); if (!dentry) - return ERR_PTR(-ENOENT); + return; + + if (dentry2offset(dentry) >= last_index) { + dput(dentry); + return; + } if (!offset_dir_emit(ctx, dentry)) { dput(dentry); - break; + return; } ctx->pos = dentry2offset(dentry) + 1; dput(dentry); } - return NULL; } /** @@ -552,22 +568,19 @@ static void *offset_iterate_dir(struct inode *inode, struct dir_context *ctx) static int offset_readdir(struct file *file, struct dir_context *ctx) { struct dentry *dir = file->f_path.dentry; + long last_index = (long)file->private_data; lockdep_assert_held(&d_inode(dir)->i_rwsem); if (!dir_emit_dots(file, ctx)) return 0; - /* In this case, ->private_data is protected by f_pos_lock */ - if (ctx->pos == DIR_OFFSET_MIN) - file->private_data = NULL; - else if (file->private_data == ERR_PTR(-ENOENT)) - return 0; - file->private_data = offset_iterate_dir(d_inode(dir), ctx); + offset_iterate_dir(d_inode(dir), ctx, last_index); return 0; } const struct file_operations simple_offset_dir_operations = { + .open = offset_dir_open, .llseek = offset_dir_llseek, .iterate_shared = offset_readdir, .read = generic_read_dir, diff --git a/fs/locks.c b/fs/locks.c index 9afb16e0683ffd0ff0430849686dc6ea8b8640f9..e45cad40f8b6bd0ffe007c706eac989082307fae 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -2984,7 +2984,7 @@ static int __init filelock_init(void) filelock_cache = kmem_cache_create("file_lock_cache", sizeof(struct file_lock), 0, SLAB_PANIC, NULL); - filelease_cache = kmem_cache_create("file_lock_cache", + filelease_cache = kmem_cache_create("file_lease_cache", sizeof(struct file_lease), 0, SLAB_PANIC, NULL); for_each_possible_cpu(i) { diff --git a/fs/netfs/Kconfig b/fs/netfs/Kconfig index 1b78e8b65ebc142d0ff226ec1f65a37d6b3349c6..7701c037c3283f27e830a49483ce377c8a802538 100644 --- a/fs/netfs/Kconfig +++ b/fs/netfs/Kconfig @@ -24,7 +24,7 @@ config NETFS_STATS config NETFS_DEBUG bool "Enable dynamic debugging netfslib and FS-Cache" - depends on NETFS + depends on NETFS_SUPPORT help This permits debugging to be dynamically enabled in the local caching management module. If this is set, the debugging output may be diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a688d4c75d9926864ae80d2d26a7e1ed6a34ac3d..27c750d39476260bc971591a4735748ba2c019e6 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -9,6 +9,97 @@ #include #include "internal.h" +/* + * [DEPRECATED] Unlock the folios in a read operation for when the filesystem + * is using PG_private_2 and direct writing to the cache from here rather than + * marking the page for writeback. + * + * Note that we don't touch folio->private in this code. + */ +static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq, + size_t *account) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t start_page = rreq->start / PAGE_SIZE; + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; + bool subreq_failed = false; + + XA_STATE(xas, &rreq->mapping->i_pages, start_page); + + /* Walk through the pagecache and the I/O request lists simultaneously. + * We may have a mixture of cached and uncached sections and we only + * really want to write out the uncached sections. This is slightly + * complicated by the possibility that we might have huge pages with a + * mixture inside. + */ + subreq = list_first_entry(&rreq->subrequests, + struct netfs_io_subrequest, rreq_link); + subreq_failed = (subreq->error < 0); + + trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2); + + rcu_read_lock(); + xas_for_each(&xas, folio, last_page) { + loff_t pg_end; + bool pg_failed = false; + bool folio_started = false; + + if (xas_retry(&xas, folio)) + continue; + + pg_end = folio_pos(folio) + folio_size(folio) - 1; + + for (;;) { + loff_t sreq_end; + + if (!subreq) { + pg_failed = true; + break; + } + + if (!folio_started && + test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) && + fscache_operation_valid(&rreq->cache_resources)) { + trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); + folio_start_private_2(folio); + folio_started = true; + } + + pg_failed |= subreq_failed; + sreq_end = subreq->start + subreq->len - 1; + if (pg_end < sreq_end) + break; + + *account += subreq->transferred; + if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + subreq = list_next_entry(subreq, rreq_link); + subreq_failed = (subreq->error < 0); + } else { + subreq = NULL; + subreq_failed = false; + } + + if (pg_end == sreq_end) + break; + } + + if (!pg_failed) { + flush_dcache_folio(folio); + folio_mark_uptodate(folio); + } + + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { + if (folio->index == rreq->no_unlock_folio && + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) + _debug("no unlock"); + else + folio_unlock(folio); + } + } + rcu_read_unlock(); +} + /* * Unlock the folios in a read operation. We need to set PG_writeback on any * folios we're going to write back before we unlock them. @@ -35,6 +126,12 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } } + /* Handle deprecated PG_private_2 case. */ + if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { + netfs_rreq_unlock_folios_pgpriv2(rreq, &account); + goto out; + } + /* Walk through the pagecache and the I/O request lists simultaneously. * We may have a mixture of cached and uncached sections and we only * really want to write out the uncached sections. This is slightly @@ -52,7 +149,6 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) loff_t pg_end; bool pg_failed = false; bool wback_to_cache = false; - bool folio_started = false; if (xas_retry(&xas, folio)) continue; @@ -66,17 +162,8 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) pg_failed = true; break; } - if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) { - if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, - &subreq->flags)) { - trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache); - folio_start_private_2(folio); - folio_started = true; - } - } else { - wback_to_cache |= - test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); - } + + wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); pg_failed |= subreq_failed; sreq_end = subreq->start + subreq->len - 1; if (pg_end < sreq_end) @@ -124,6 +211,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } rcu_read_unlock(); +out: task_io_account_read(account); if (rreq->netfs_ops->done) rreq->netfs_ops->done(rreq); @@ -395,7 +483,7 @@ zero_out: } /** - * netfs_write_begin - Helper to prepare for writing + * netfs_write_begin - Helper to prepare for writing [DEPRECATED] * @ctx: The netfs context * @file: The file to read from * @mapping: The mapping to read from @@ -426,6 +514,9 @@ zero_out: * inode before calling this. * * This is usable whether or not caching is enabled. + * + * Note that this should be considered deprecated and netfs_perform_write() + * used instead. */ int netfs_write_begin(struct netfs_inode *ctx, struct file *file, struct address_space *mapping, @@ -466,7 +557,7 @@ retry: if (!netfs_is_cache_enabled(ctx) && netfs_skip_folio_read(folio, pos, len, false)) { netfs_stat(&netfs_n_rh_write_zskip); - goto have_folio; + goto have_folio_no_wait; } rreq = netfs_alloc_request(mapping, file, @@ -507,6 +598,10 @@ retry: netfs_put_request(rreq, false, netfs_rreq_trace_put_return); have_folio: + ret = folio_wait_private_2_killable(folio); + if (ret < 0) + goto error; +have_folio_no_wait: *_folio = folio; _leave(" = 0"); return 0; diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 4726c315453c3a36007df96826414ec2757467ba..ca53c5d1622edecf633aea8f174b96e87351f370 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -184,7 +184,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; - size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; + size_t max_chunk = mapping_max_folio_size(mapping); bool maybe_trouble = false; if (unlikely(test_bit(NETFS_ICTX_WRITETHROUGH, &ctx->flags) || diff --git a/fs/netfs/fscache_cookie.c b/fs/netfs/fscache_cookie.c index bce2492186d0b44ef5a69733b690b3c9e18beb21..d4d4b3a8b10603794e57333adfbf66562b491639 100644 --- a/fs/netfs/fscache_cookie.c +++ b/fs/netfs/fscache_cookie.c @@ -741,6 +741,10 @@ again_locked: spin_lock(&cookie->lock); } if (test_bit(FSCACHE_COOKIE_DO_LRU_DISCARD, &cookie->flags)) { + if (atomic_read(&cookie->n_accesses) != 0) + /* still being accessed: postpone it */ + break; + __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_LRU_DISCARDING); wake = true; diff --git a/fs/netfs/io.c b/fs/netfs/io.c index c93851b9836889e87257cd058eec8926201738b2..5367caf3fa28630cedf8364ac63ec3d1db7699ad 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -98,6 +98,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async) netfs_put_request(rreq, was_async, netfs_rreq_trace_put_complete); } +/* + * [DEPRECATED] Deal with the completion of writing the data to the cache. We + * have to clear the PG_fscache bits on the folios involved and release the + * caller's ref. + * + * May be called in softirq mode and we inherit a ref from the caller. + */ +static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, + bool was_async) +{ + struct netfs_io_subrequest *subreq; + struct folio *folio; + pgoff_t unlocked = 0; + bool have_unlocked = false; + + rcu_read_lock(); + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE); + + xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) { + if (xas_retry(&xas, folio)) + continue; + + /* We might have multiple writes from the same huge + * folio, but we mustn't unlock a folio more than once. + */ + if (have_unlocked && folio->index <= unlocked) + continue; + unlocked = folio_next_index(folio) - 1; + trace_netfs_folio(folio, netfs_folio_trace_end_copy); + folio_end_private_2(folio); + have_unlocked = true; + } + } + + rcu_read_unlock(); + netfs_rreq_completed(rreq, was_async); +} + +static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error, + bool was_async) /* [DEPRECATED] */ +{ + struct netfs_io_subrequest *subreq = priv; + struct netfs_io_request *rreq = subreq->rreq; + + if (IS_ERR_VALUE(transferred_or_error)) { + netfs_stat(&netfs_n_rh_write_failed); + trace_netfs_failure(rreq, subreq, transferred_or_error, + netfs_fail_copy_to_cache); + } else { + netfs_stat(&netfs_n_rh_write_done); + } + + trace_netfs_sreq(subreq, netfs_sreq_trace_write_term); + + /* If we decrement nr_copy_ops to 0, the ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, was_async); + + netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated); +} + +/* + * [DEPRECATED] Perform any outstanding writes to the cache. We inherit a ref + * from the caller. + */ +static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq) +{ + struct netfs_cache_resources *cres = &rreq->cache_resources; + struct netfs_io_subrequest *subreq, *next, *p; + struct iov_iter iter; + int ret; + + trace_netfs_rreq(rreq, netfs_rreq_trace_copy); + + /* We don't want terminating writes trying to wake us up whilst we're + * still going through the list. + */ + atomic_inc(&rreq->nr_copy_ops); + + list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) { + if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) { + list_del_init(&subreq->rreq_link); + netfs_put_subrequest(subreq, false, + netfs_sreq_trace_put_no_copy); + } + } + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + /* Amalgamate adjacent writes */ + while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { + next = list_next_entry(subreq, rreq_link); + if (next->start != subreq->start + subreq->len) + break; + subreq->len += next->len; + list_del_init(&next->rreq_link); + netfs_put_subrequest(next, false, + netfs_sreq_trace_put_merged); + } + + ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len, + subreq->len, rreq->i_size, true); + if (ret < 0) { + trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write); + trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip); + continue; + } + + iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages, + subreq->start, subreq->len); + + atomic_inc(&rreq->nr_copy_ops); + netfs_stat(&netfs_n_rh_write); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache); + trace_netfs_sreq(subreq, netfs_sreq_trace_write); + cres->ops->write(cres, subreq->start, &iter, + netfs_rreq_copy_terminated, subreq); + } + + /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */ + if (atomic_dec_and_test(&rreq->nr_copy_ops)) + netfs_rreq_unmark_after_write(rreq, false); +} + +static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */ +{ + struct netfs_io_request *rreq = + container_of(work, struct netfs_io_request, work); + + netfs_rreq_do_write_to_cache(rreq); +} + +static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */ +{ + rreq->work.func = netfs_rreq_write_to_cache_work; + if (!queue_work(system_unbound_wq, &rreq->work)) + BUG(); +} + /* * Handle a short read. */ @@ -275,6 +415,10 @@ again: clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags); wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS); + if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) && + test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) + return netfs_rreq_write_to_cache(rreq); + netfs_rreq_completed(rreq, was_async); } @@ -386,7 +530,8 @@ incomplete: if (transferred_or_error == 0) { if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { - subreq->error = -ENODATA; + if (rreq->origin != NETFS_DIO_READ) + subreq->error = -ENODATA; goto failed; } } else { @@ -457,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, } if (subreq->len > ictx->zero_point - subreq->start) subreq->len = ictx->zero_point - subreq->start; + + /* We limit buffered reads to the EOF, but let the + * server deal with larger-than-EOF DIO/unbuffered + * reads. + */ + if (subreq->len > rreq->i_size - subreq->start) + subreq->len = rreq->i_size - subreq->start; } - if (subreq->len > rreq->i_size - subreq->start) - subreq->len = rreq->i_size - subreq->start; if (rreq->rsize && subreq->len > rreq->rsize) subreq->len = rreq->rsize; @@ -595,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) do { _debug("submit %llx + %llx >= %llx", rreq->start, rreq->submitted, rreq->i_size); - if (rreq->origin == NETFS_DIO_READ && - rreq->start + rreq->submitted >= rreq->i_size) - break; if (!netfs_rreq_submit_slice(rreq, &io_iter)) break; + if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags)) + break; if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) && test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags)) break; diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index f4a642727479252de0212f80fe32ef23468e0ebf..0294df70c3ff4563e023be18421cb85a8cca4370 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -24,10 +24,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, struct netfs_io_request *rreq; mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool; struct kmem_cache *cache = mempool->pool_data; - bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE || - origin == NETFS_DIO_READ || - origin == NETFS_DIO_WRITE); - bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx); int ret; for (;;) { @@ -56,12 +52,6 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping, refcount_set(&rreq->ref, 1); __set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags); - if (cached) { - __set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags); - if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags)) - /* Filesystem uses deprecated PG_private_2 marking. */ - __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); - } if (file && file->f_flags & O_NONBLOCK) __set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags); if (rreq->netfs_ops->init_request) { diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 9258d30cffe3cf4275b20cd0944a817c238292b9..3f7e37e50c7d02456af24ae7cbdc9429d6875580 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -94,6 +94,8 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, { struct netfs_io_request *wreq; struct netfs_inode *ictx; + bool is_buffered = (origin == NETFS_WRITEBACK || + origin == NETFS_WRITETHROUGH); wreq = netfs_alloc_request(mapping, file, start, 0, origin); if (IS_ERR(wreq)) @@ -102,7 +104,7 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, _enter("R=%x", wreq->debug_id); ictx = netfs_inode(wreq->inode); - if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) + if (is_buffered && netfs_is_cache_enabled(ictx)) fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx)); wreq->contiguity = wreq->start; diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 29c49a7e5fe1c10aca0b700d47adfa49c8ffbd86..6df77f008d3fada0066275fda0625e6c51368a56 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -118,7 +118,9 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) if (likely(attrlen > 0)) bitmap[0] = ntohl(*p++); if (attrlen > 1) - bitmap[1] = ntohl(*p); + bitmap[1] = ntohl(*p++); + if (attrlen > 2) + bitmap[2] = ntohl(*p); return 0; } @@ -446,7 +448,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp, void *argp) { struct cb_recallanyargs *args = argp; - uint32_t bitmap[2]; + uint32_t bitmap[3]; __be32 *p, status; p = xdr_inline_decode(xdr, 4); diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index d5edb3b3eeef0d5e1b1aaba7817d5958e9155a5c..20cb2008f9e4696b05284eab6a0fa578eaa39626 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -647,6 +647,9 @@ restart: prev = delegation; continue; } + inode = nfs_delegation_grab_inode(delegation); + if (inode == NULL) + continue; if (prev) { struct inode *tmp = nfs_delegation_grab_inode(prev); @@ -657,12 +660,6 @@ restart: } } - inode = nfs_delegation_grab_inode(delegation); - if (inode == NULL) { - rcu_read_unlock(); - iput(to_put); - goto restart; - } delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); @@ -1184,7 +1181,6 @@ static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server, struct inode *inode; restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1195,7 +1191,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; delegation = nfs_start_delegation_return_locked(NFS_I(inode)); rcu_read_unlock(); if (delegation != NULL) { @@ -1318,7 +1314,6 @@ static int nfs_server_reap_expired_delegations(struct nfs_server *server, restart: rcu_read_lock(); -restart_locked: list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags) || @@ -1330,7 +1325,7 @@ restart_locked: continue; inode = nfs_delegation_grab_inode(delegation); if (inode == NULL) - goto restart_locked; + continue; spin_lock(&delegation->lock); cred = get_cred_rcu(delegation->cred); nfs4_stateid_copy(&stateid, &delegation->stateid); diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 7202ce84d0eb032784e241fa8a82e79486948d49..7a558dea75c4092663a7a2230b4430c7a89efea0 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -265,6 +265,8 @@ static int nfs_netfs_init_request(struct netfs_io_request *rreq, struct file *fi { rreq->netfs_priv = get_nfs_open_context(nfs_file_open_context(file)); rreq->debug_id = atomic_inc_return(&nfs_netfs_debug_id); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags); return 0; } @@ -361,7 +363,8 @@ void nfs_netfs_read_completion(struct nfs_pgio_header *hdr) return; sreq = netfs->sreq; - if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) + if (test_bit(NFS_IOHDR_EOF, &hdr->flags) && + sreq->rreq->origin != NETFS_DIO_READ) __set_bit(NETFS_SREQ_CLEAR_TAIL, &sreq->flags); if (hdr->error) diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index fbed0027996f8840ddbe7a16ebe457fa684c525c..e8adae1bc260a31fdab9c3ed91b7e1373609759e 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -81,8 +81,6 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); - /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ - __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8883016c551cec632d22e82f6bd7d4c4408b6c36..b8ffbe52ba15a5c0204d9e7cb62e4d2a414b5d9a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3931,7 +3931,8 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f FATTR4_WORD0_CASE_INSENSITIVE | FATTR4_WORD0_CASE_PRESERVING; if (minorversion) - bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT; + bitmask[2] = FATTR4_WORD2_SUPPATTR_EXCLCREAT | + FATTR4_WORD2_OPEN_ARGUMENTS; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { @@ -9997,6 +9998,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) fallthrough; default: task->tk_status = 0; + lrp->res.lrs_present = 0; fallthrough; case 0: break; @@ -10010,9 +10012,11 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) task->tk_status = 0; break; case -NFS4ERR_DELAY: - if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN) - break; - goto out_restart; + if (nfs4_async_handle_error(task, server, NULL, NULL) == + -EAGAIN) + goto out_restart; + lrp->res.lrs_present = 0; + break; } return; out_restart: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index aa698481bec8d1544aef72518f50f0e19fb46e27..0d16b383a45262f75f06db6601669b7f55df4fa4 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1284,10 +1284,9 @@ void pnfs_layoutreturn_free_lsegs(struct pnfs_layout_hdr *lo, LIST_HEAD(freeme); spin_lock(&inode->i_lock); - if (!pnfs_layout_is_valid(lo) || - !nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) + if (!nfs4_stateid_match_other(&lo->plh_stateid, arg_stateid)) goto out_unlock; - if (stateid) { + if (stateid && pnfs_layout_is_valid(lo)) { u32 seq = be32_to_cpu(arg_stateid->seqid); pnfs_mark_matching_lsegs_invalid(lo, &freeme, range, seq); diff --git a/fs/nfs/super.c b/fs/nfs/super.c index cbbd4866b0b7a41261042faf1b43344166208978..97b386032b717a83189679ca63f265febe65b643 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -228,6 +229,7 @@ static int __nfs_list_for_each_server(struct list_head *head, ret = fn(server, data); if (ret) goto out; + cond_resched(); rcu_read_lock(); } rcu_read_unlock(); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 9e0ea6fc2aa326c5413d792743a330ee5eec68f9..34eb2c2cbcde34377a347efe7a5e2dbffe552053 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -2069,8 +2069,7 @@ int nfsd_nl_listener_set_doit(struct sk_buff *skb, struct genl_info *info) continue; } - ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, - SVC_SOCK_ANONYMOUS, + ret = svc_xprt_create_from_sa(serv, xcl_name, net, sa, 0, get_current_cred()); /* always save the latest error */ if (ret < 0) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index c71ae5c043060ebf5dd7f6d9e5f63e6e7bcf7841..4a20e92474b234dec8ace6f7e806fb3be62802c3 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -1072,7 +1072,7 @@ static int cifs_security_flags_proc_open(struct inode *inode, struct file *file) static void cifs_security_flags_handle_must_flags(unsigned int *flags) { - unsigned int signflags = *flags & CIFSSEC_MUST_SIGN; + unsigned int signflags = *flags & (CIFSSEC_MUST_SIGN | CIFSSEC_MUST_SEAL); if ((*flags & CIFSSEC_MUST_KRB5) == CIFSSEC_MUST_KRB5) *flags = CIFSSEC_MUST_KRB5; diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 2c4b357d85e22cfc60476a0be6e6bf7b24b826f3..d89485235425aee542cfd503135f8a50f337ec88 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -75,9 +75,9 @@ unsigned int sign_CIFS_PDUs = 1; /* * Global transaction id (XID) information */ -unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 62d5fee3e5ebbdefc0de78a5bd20f5d6d0dd51f9..ca2bd204bcc591904c1b32da6c1c096d2384f655 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -147,6 +147,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 49 -#define CIFS_VERSION "2.49" +#define SMB3_PRODUCT_BUILD 50 +#define CIFS_VERSION "2.50" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 8e86fec7dcd2ba6f54167600f651439540942045..7ebe80a25d045765668165bb3bc8347d914e8174 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -345,7 +345,7 @@ struct smb_version_operations { /* connect to a server share */ int (*tree_connect)(const unsigned int, struct cifs_ses *, const char *, struct cifs_tcon *, const struct nls_table *); - /* close tree connecion */ + /* close tree connection */ int (*tree_disconnect)(const unsigned int, struct cifs_tcon *); /* get DFS referrals */ int (*get_dfs_refer)(const unsigned int, struct cifs_ses *, @@ -816,7 +816,7 @@ struct TCP_Server_Info { * Protected by @refpath_lock and @srv_lock. The @refpath_lock is * mostly used for not requiring a copy of @leaf_fullpath when getting * cached or new DFS referrals (which might also sleep during I/O). - * While @srv_lock is held for making string and NULL comparions against + * While @srv_lock is held for making string and NULL comparisons against * both fields as in mount(2) and cache refresh. * * format: \\HOST\SHARE[\OPTIONAL PATH] @@ -1471,29 +1471,6 @@ struct cifs_io_parms { struct TCP_Server_Info *server; }; -struct cifs_aio_ctx { - struct kref refcount; - struct list_head list; - struct mutex aio_mutex; - struct completion done; - struct iov_iter iter; - struct kiocb *iocb; - struct cifsFileInfo *cfile; - struct bio_vec *bv; - loff_t pos; - unsigned int nr_pinned_pages; - ssize_t rc; - unsigned int len; - unsigned int total_len; - unsigned int bv_need_unpin; /* If ->bv[] needs unpinning */ - bool should_dirty; - /* - * Indicates if this aio_ctx is for direct_io, - * If yes, iter is a copy of the user passed iov_iter - */ - bool direct_io; -}; - struct cifs_io_request { struct netfs_io_request rreq; struct cifsFileInfo *cfile; @@ -1904,7 +1881,7 @@ static inline bool is_replayable_error(int error) #define CIFSSEC_MAY_SIGN 0x00001 #define CIFSSEC_MAY_NTLMV2 0x00004 #define CIFSSEC_MAY_KRB5 0x00008 -#define CIFSSEC_MAY_SEAL 0x00040 /* not supported yet */ +#define CIFSSEC_MAY_SEAL 0x00040 #define CIFSSEC_MAY_NTLMSSP 0x00080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_MUST_SIGN 0x01001 @@ -1914,11 +1891,11 @@ require use of the stronger protocol */ #define CIFSSEC_MUST_NTLMV2 0x04004 #define CIFSSEC_MUST_KRB5 0x08008 #ifdef CONFIG_CIFS_UPCALL -#define CIFSSEC_MASK 0x8F08F /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xCF0CF /* flags supported if no weak allowed */ #else -#define CIFSSEC_MASK 0x87087 /* flags supported if no weak allowed */ +#define CIFSSEC_MASK 0xC70C7 /* flags supported if no weak allowed */ #endif /* UPCALL */ -#define CIFSSEC_MUST_SEAL 0x40040 /* not supported yet */ +#define CIFSSEC_MUST_SEAL 0x40040 #define CIFSSEC_MUST_NTLMSSP 0x80080 /* raw ntlmssp with ntlmv2 */ #define CIFSSEC_DEF (CIFSSEC_MAY_SIGN | CIFSSEC_MAY_NTLMV2 | CIFSSEC_MAY_NTLMSSP | CIFSSEC_MAY_SEAL) @@ -2010,7 +1987,6 @@ require use of the stronger protocol */ * cifsFileInfo->file_info_lock cifsFileInfo->count cifs_new_fileinfo * ->invalidHandle initiate_cifs_search * ->oplock_break_cancelled - * cifs_aio_ctx->aio_mutex cifs_aio_ctx cifs_aio_ctx_alloc ****************************************************************************/ #ifdef DECLARE_GLOBALS_HERE @@ -2041,9 +2017,9 @@ extern spinlock_t cifs_tcp_ses_lock; /* * Global transaction id (XID) information */ -extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Sem */ -extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */ -extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Sem */ +extern unsigned int GlobalCurrentXid; /* protected by GlobalMid_Lock */ +extern unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Lock */ +extern unsigned int GlobalMaxActiveXid; /* prot by GlobalMid_Lock */ extern spinlock_t GlobalMid_Lock; /* protects above & list operations on midQ entries */ /* diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h index c15bb5ee7eb7d6787f5138499591071169008bda..497bf3c447bcb59486feed877560bd920d91a70e 100644 --- a/fs/smb/client/cifsproto.h +++ b/fs/smb/client/cifsproto.h @@ -619,8 +619,6 @@ int __cifs_calc_signature(struct smb_rqst *rqst, struct shash_desc *shash); enum securityEnum cifs_select_sectype(struct TCP_Server_Info *, enum securityEnum); -struct cifs_aio_ctx *cifs_aio_ctx_alloc(void); -void cifs_aio_ctx_release(struct kref *refcount); int cifs_alloc_hash(const char *name, struct shash_desc **sdesc); void cifs_free_hash(struct shash_desc **sdesc); diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index d2307162a2de15ec01462d49b8ce3191a0be9f66..c1c14274930ac6826f422fc18cbaec5dab88293b 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -4194,6 +4194,9 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink) * * If one doesn't exist then insert a new tcon_link struct into the tree and * try to construct a new one. + * + * REMEMBER to call cifs_put_tlink() after successful calls to cifs_sb_tlink, + * to avoid refcount issues */ struct tcon_link * cifs_sb_tlink(struct cifs_sb_info *cifs_sb) diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index b2405dd4d4d4da6e515c554b9c3e9649ec59c4cb..f9b302cb8233cfe7f547a813e2133fa34c70aa7b 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -217,7 +217,8 @@ static void cifs_req_issue_read(struct netfs_io_subrequest *subreq) goto out; } - __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); + if (subreq->rreq->origin != NETFS_DIO_READ) + __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags); rc = rdata->server->ops->async_readv(rdata); out: @@ -315,7 +316,7 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) #endif } - if (rdata->credits.value != 0) + if (rdata->credits.value != 0) { trace_smb3_rw_credits(rdata->rreq->debug_id, rdata->subreq.debug_index, rdata->credits.value, @@ -323,8 +324,12 @@ static void cifs_free_subrequest(struct netfs_io_subrequest *subreq) rdata->server ? rdata->server->in_flight : 0, -rdata->credits.value, cifs_trace_rw_credits_free_subreq); + if (rdata->server) + add_credits_and_wake_if(rdata->server, &rdata->credits, 0); + else + rdata->credits.value = 0; + } - add_credits_and_wake_if(rdata->server, &rdata->credits, 0); if (rdata->have_xid) free_xid(rdata->xid); } @@ -2749,6 +2754,7 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) struct inode *inode = file->f_mapping->host; struct cifsInodeInfo *cinode = CIFS_I(inode); struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server; + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); ssize_t rc; rc = netfs_start_io_write(inode); @@ -2765,12 +2771,16 @@ cifs_writev(struct kiocb *iocb, struct iov_iter *from) if (rc <= 0) goto out; - if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) && + (cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from), server->vals->exclusive_lock_type, 0, - NULL, CIFS_WRITE_OP)) - rc = netfs_buffered_write_iter_locked(iocb, from, NULL); - else + NULL, CIFS_WRITE_OP))) { rc = -EACCES; + goto out; + } + + rc = netfs_buffered_write_iter_locked(iocb, from, NULL); + out: up_read(&cinode->lock_sem); netfs_end_io_write(inode); @@ -2902,9 +2912,7 @@ cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) if (!CIFS_CACHE_READ(cinode)) return netfs_unbuffered_read_iter(iocb, to); - if (cap_unix(tcon->ses) && - (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && - ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { + if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0) { if (iocb->ki_flags & IOCB_DIRECT) return netfs_unbuffered_read_iter(iocb, to); return netfs_buffered_read_iter(iocb, to); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 4a8aa1de95223dfc487e577e476be18d677b6477..dd0afa23734c8d7d1aa14a0a6fee7ba2cf9c10c5 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -1042,13 +1042,26 @@ static int reparse_info_to_fattr(struct cifs_open_info_data *data, } rc = -EOPNOTSUPP; - switch ((data->reparse.tag = tag)) { - case 0: /* SMB1 symlink */ + data->reparse.tag = tag; + if (!data->reparse.tag) { if (server->ops->query_symlink) { rc = server->ops->query_symlink(xid, tcon, cifs_sb, full_path, &data->symlink_target); } + if (rc == -EOPNOTSUPP) + data->reparse.tag = IO_REPARSE_TAG_INTERNAL; + } + + switch (data->reparse.tag) { + case 0: /* SMB1 symlink */ + break; + case IO_REPARSE_TAG_INTERNAL: + rc = 0; + if (le32_to_cpu(data->fi.Attributes) & ATTR_DIRECTORY) { + cifs_create_junction_fattr(fattr, sb); + goto out; + } break; case IO_REPARSE_TAG_MOUNT_POINT: cifs_create_junction_fattr(fattr, sb); diff --git a/fs/smb/client/ioctl.c b/fs/smb/client/ioctl.c index 855ac5a62edfaa50215cfed46e361dcb79f0c8fc..9bb5c869f4db7e60ad215967e5cf7721543fee21 100644 --- a/fs/smb/client/ioctl.c +++ b/fs/smb/client/ioctl.c @@ -170,7 +170,10 @@ static long smb_mnt_get_fsinfo(unsigned int xid, struct cifs_tcon *tcon, static int cifs_shutdown(struct super_block *sb, unsigned long arg) { struct cifs_sb_info *sbi = CIFS_SB(sb); + struct tcon_link *tlink; + struct cifs_tcon *tcon; __u32 flags; + int rc; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -178,14 +181,21 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) if (get_user(flags, (__u32 __user *)arg)) return -EFAULT; - if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) - return -EINVAL; + tlink = cifs_sb_tlink(sbi); + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); + + trace_smb3_shutdown_enter(flags, tcon->tid); + if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH) { + rc = -EINVAL; + goto shutdown_out_err; + } if (cifs_forced_shutdown(sbi)) - return 0; + goto shutdown_good; cifs_dbg(VFS, "shut down requested (%d)", flags); -/* trace_cifs_shutdown(sb, flags);*/ /* * see: @@ -201,7 +211,8 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) */ case CIFS_GOING_FLAGS_DEFAULT: cifs_dbg(FYI, "shutdown with default flag not supported\n"); - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; /* * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not * data) but metadata writes are not cached on the client, so can treat @@ -210,11 +221,20 @@ static int cifs_shutdown(struct super_block *sb, unsigned long arg) case CIFS_GOING_FLAGS_LOGFLUSH: case CIFS_GOING_FLAGS_NOLOGFLUSH: sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN; - return 0; + goto shutdown_good; default: - return -EINVAL; + rc = -EINVAL; + goto shutdown_out_err; } + +shutdown_good: + trace_smb3_shutdown_done(flags, tcon->tid); + cifs_put_tlink(tlink); return 0; +shutdown_out_err: + trace_smb3_shutdown_err(rc, flags, tcon->tid); + cifs_put_tlink(tlink); + return rc; } static int cifs_dump_full_key(struct cifs_tcon *tcon, struct smb3_full_key_debug_info __user *in) diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c index d86da949a919056b23b68f400b36933a94e9635f..80099bbb333b0847364161782cd361fe39b9dc91 100644 --- a/fs/smb/client/link.c +++ b/fs/smb/client/link.c @@ -588,6 +588,7 @@ cifs_symlink(struct mnt_idmap *idmap, struct inode *inode, tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); + /* BB could be clearer if skipped put_tlink on error here, but harmless */ goto symlink_exit; } pTcon = tlink_tcon(tlink); diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 07c468ddb88a89d65f8a48433a055759deb3da26..c6f11e6f9eb995b6e809ee18c320d1ec2854f9f8 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -352,7 +352,7 @@ checkSMB(char *buf, unsigned int total_read, struct TCP_Server_Info *server) * on simple responses (wct, bcc both zero) * in particular have seen this on * ulogoffX and FindClose. This leaves - * one byte of bcc potentially unitialized + * one byte of bcc potentially uninitialized */ /* zero rest of bcc */ tmp[sizeof(struct smb_hdr)+1] = 0; @@ -995,60 +995,6 @@ parse_DFS_referrals_exit: return rc; } -struct cifs_aio_ctx * -cifs_aio_ctx_alloc(void) -{ - struct cifs_aio_ctx *ctx; - - /* - * Must use kzalloc to initialize ctx->bv to NULL and ctx->direct_io - * to false so that we know when we have to unreference pages within - * cifs_aio_ctx_release() - */ - ctx = kzalloc(sizeof(struct cifs_aio_ctx), GFP_KERNEL); - if (!ctx) - return NULL; - - INIT_LIST_HEAD(&ctx->list); - mutex_init(&ctx->aio_mutex); - init_completion(&ctx->done); - kref_init(&ctx->refcount); - return ctx; -} - -void -cifs_aio_ctx_release(struct kref *refcount) -{ - struct cifs_aio_ctx *ctx = container_of(refcount, - struct cifs_aio_ctx, refcount); - - cifsFileInfo_put(ctx->cfile); - - /* - * ctx->bv is only set if setup_aio_ctx_iter() was call successfuly - * which means that iov_iter_extract_pages() was a success and thus - * that we may have references or pins on pages that we need to - * release. - */ - if (ctx->bv) { - if (ctx->should_dirty || ctx->bv_need_unpin) { - unsigned int i; - - for (i = 0; i < ctx->nr_pinned_pages; i++) { - struct page *page = ctx->bv[i].bv_page; - - if (ctx->should_dirty) - set_page_dirty(page); - if (ctx->bv_need_unpin) - unpin_user_page(page); - } - } - kvfree(ctx->bv); - } - - kfree(ctx); -} - /** * cifs_alloc_hash - allocate hash and hash context together * @name: The name of the crypto hash algo @@ -1288,6 +1234,7 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, const char *full_path, bool *islink) { + struct TCP_Server_Info *server = tcon->ses->server; struct cifs_ses *ses = tcon->ses; size_t len; char *path; @@ -1304,12 +1251,12 @@ int cifs_inval_name_dfs_link_error(const unsigned int xid, !is_tcon_dfs(tcon)) return 0; - spin_lock(&tcon->tc_lock); - if (!tcon->origin_fullpath) { - spin_unlock(&tcon->tc_lock); + spin_lock(&server->srv_lock); + if (!server->leaf_fullpath) { + spin_unlock(&server->srv_lock); return 0; } - spin_unlock(&tcon->tc_lock); + spin_unlock(&server->srv_lock); /* * Slow path - tcon is DFS and @full_path has prefix path, so attempt diff --git a/fs/smb/client/reparse.c b/fs/smb/client/reparse.c index a0ffbda90733170bf6c80267f29c90e9fb49d62f..48c27581ec511cd4f90b342850fcfd99b2772bff 100644 --- a/fs/smb/client/reparse.c +++ b/fs/smb/client/reparse.c @@ -378,6 +378,8 @@ int parse_reparse_point(struct reparse_data_buffer *buf, u32 plen, struct cifs_sb_info *cifs_sb, bool unicode, struct cifs_open_info_data *data) { + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + data->reparse.buf = buf; /* See MS-FSCC 2.1.2 */ @@ -394,12 +396,13 @@ int parse_reparse_point(struct reparse_data_buffer *buf, case IO_REPARSE_TAG_LX_FIFO: case IO_REPARSE_TAG_LX_CHR: case IO_REPARSE_TAG_LX_BLK: - return 0; + break; default: - cifs_dbg(VFS, "%s: unhandled reparse tag: 0x%08x\n", - __func__, le32_to_cpu(buf->ReparseTag)); - return -EOPNOTSUPP; + cifs_tcon_dbg(VFS | ONCE, "unhandled reparse tag: 0x%08x\n", + le32_to_cpu(buf->ReparseTag)); + break; } + return 0; } int smb2_parse_reparse_point(struct cifs_sb_info *cifs_sb, @@ -505,6 +508,10 @@ bool cifs_reparse_point_to_fattr(struct cifs_sb_info *cifs_sb, } switch (tag) { + case IO_REPARSE_TAG_INTERNAL: + if (!(fattr->cf_cifsattrs & ATTR_DIRECTORY)) + return false; + fallthrough; case IO_REPARSE_TAG_DFS: case IO_REPARSE_TAG_DFSR: case IO_REPARSE_TAG_MOUNT_POINT: diff --git a/fs/smb/client/reparse.h b/fs/smb/client/reparse.h index 6b55d1df9e2f844f04b8469823ca5d370589c871..2c0644bc4e65a7e1acaed6f5415f917d5b8a62d5 100644 --- a/fs/smb/client/reparse.h +++ b/fs/smb/client/reparse.h @@ -12,6 +12,12 @@ #include "fs_context.h" #include "cifsglob.h" +/* + * Used only by cifs.ko to ignore reparse points from files when client or + * server doesn't support FSCTL_GET_REPARSE_POINT. + */ +#define IO_REPARSE_TAG_INTERNAL ((__u32)~0U) + static inline dev_t reparse_nfs_mkdev(struct reparse_posix_data *buf) { u64 v = le64_to_cpu(*(__le64 *)buf->DataBuffer); @@ -78,10 +84,19 @@ static inline u32 reparse_mode_wsl_tag(mode_t mode) static inline bool reparse_inode_match(struct inode *inode, struct cifs_fattr *fattr) { + struct cifsInodeInfo *cinode = CIFS_I(inode); struct timespec64 ctime = inode_get_ctime(inode); - return (CIFS_I(inode)->cifsAttrs & ATTR_REPARSE) && - CIFS_I(inode)->reparse_tag == fattr->cf_cifstag && + /* + * Do not match reparse tags when client or server doesn't support + * FSCTL_GET_REPARSE_POINT. @fattr->cf_cifstag should contain correct + * reparse tag from query dir response but the client won't be able to + * read the reparse point data anyway. This spares us a revalidation. + */ + if (cinode->reparse_tag != IO_REPARSE_TAG_INTERNAL && + cinode->reparse_tag != fattr->cf_cifstag) + return false; + return (cinode->cifsAttrs & ATTR_REPARSE) && timespec64_equal(&ctime, &fattr->cf_ctime); } diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 5c02a12251c8486f55095777eedee868282d1a54..9f5bc41433c15646278d4dc19115c6f0798d5c14 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -930,6 +930,8 @@ int smb2_query_path_info(const unsigned int xid, switch (rc) { case 0: + rc = parse_create_response(data, cifs_sb, &out_iov[0]); + break; case -EOPNOTSUPP: /* * BB TODO: When support for special files added to Samba @@ -948,7 +950,8 @@ int smb2_query_path_info(const unsigned int xid, cmds[num_cmds++] = SMB2_OP_GET_REPARSE; oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, - FILE_READ_ATTRIBUTES | FILE_READ_EA, + FILE_READ_ATTRIBUTES | + FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, create_options | OPEN_REPARSE_POINT, ACL_NO_MODE); cifs_get_readable_path(tcon, full_path, &cfile); @@ -1256,7 +1259,8 @@ int smb2_query_reparse_point(const unsigned int xid, cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path); cifs_get_readable_path(tcon, full_path, &cfile); - oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_READ_ATTRIBUTES, + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, + FILE_READ_ATTRIBUTES | FILE_READ_EA | SYNCHRONIZE, FILE_OPEN, OPEN_REPARSE_POINT, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 9a06b5594669b336a6fc45b068fcc5afab11b531..83facb54276a317b27333200dab66e26d16189fa 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -82,6 +82,9 @@ int smb3_encryption_required(const struct cifs_tcon *tcon) if (tcon->seal && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) return 1; + if (((global_secflags & CIFSSEC_MUST_SEAL) == CIFSSEC_MUST_SEAL) && + (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) + return 1; return 0; } diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index d74e829de51c225a94f8221f03712f9826c030d7..7bcc379014cadd4e20415b6a06914e3ad33744f8 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -406,7 +406,7 @@ static void smbd_post_send_credits(struct work_struct *work) else response = get_empty_queue_buffer(info); if (!response) { - /* now switch to emtpy packet queue */ + /* now switch to empty packet queue */ if (use_receive_queue) { use_receive_queue = 0; continue; @@ -618,7 +618,7 @@ out: /* * Test if FRWR (Fast Registration Work Requests) is supported on the device - * This implementation requries FRWR on RDMA read/write + * This implementation requires FRWR on RDMA read/write * return value: true if it is supported */ static bool frwr_is_supported(struct ib_device_attr *attrs) @@ -2177,7 +2177,7 @@ cleanup_entries: * MR available in the list. It may access the list while the * smbd_mr_recovery_work is recovering the MR list. This doesn't need a lock * as they never modify the same places. However, there may be several CPUs - * issueing I/O trying to get MR at the same time, mr_list_lock is used to + * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ static struct smbd_mr *get_mr(struct smbd_connection *info) @@ -2311,7 +2311,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* * There is no need for waiting for complemtion on ib_post_send * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution - * on the next ib_post_send when we actaully send I/O to remote peer + * on the next ib_post_send when we actually send I/O to remote peer */ rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); if (!rc) diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index 6b3bdfb97211ff1547a0729518ac94553d4239b7..0f0c10c7ada73f1ae68c8ef929ed3696679705ac 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -1388,7 +1388,7 @@ DECLARE_EVENT_CLASS(smb3_ioctl_class, __entry->command = command; ), TP_printk("xid=%u fid=0x%llx ioctl cmd=0x%x", - __entry->xid, __entry->fid, __entry->command) + __entry->xid, __entry->fid, __entry->command) ) #define DEFINE_SMB3_IOCTL_EVENT(name) \ @@ -1400,9 +1400,58 @@ DEFINE_EVENT(smb3_ioctl_class, smb3_##name, \ DEFINE_SMB3_IOCTL_EVENT(ioctl); +DECLARE_EVENT_CLASS(smb3_shutdown_class, + TP_PROTO(__u32 flags, + __u32 tid), + TP_ARGS(flags, tid), + TP_STRUCT__entry( + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("flags=0x%x tid=0x%x", + __entry->flags, __entry->tid) +) + +#define DEFINE_SMB3_SHUTDOWN_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_class, smb3_##name, \ + TP_PROTO(__u32 flags, \ + __u32 tid), \ + TP_ARGS(flags, tid)) + +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_enter); +DEFINE_SMB3_SHUTDOWN_EVENT(shutdown_done); +DECLARE_EVENT_CLASS(smb3_shutdown_err_class, + TP_PROTO(int rc, + __u32 flags, + __u32 tid), + TP_ARGS(rc, flags, tid), + TP_STRUCT__entry( + __field(int, rc) + __field(__u32, flags) + __field(__u32, tid) + ), + TP_fast_assign( + __entry->rc = rc; + __entry->flags = flags; + __entry->tid = tid; + ), + TP_printk("rc=%d flags=0x%x tid=0x%x", + __entry->rc, __entry->flags, __entry->tid) +) +#define DEFINE_SMB3_SHUTDOWN_ERR_EVENT(name) \ +DEFINE_EVENT(smb3_shutdown_err_class, smb3_##name, \ + TP_PROTO(int rc, \ + __u32 flags, \ + __u32 tid), \ + TP_ARGS(rc, flags, tid)) +DEFINE_SMB3_SHUTDOWN_ERR_EVENT(shutdown_err); DECLARE_EVENT_CLASS(smb3_credit_class, TP_PROTO(__u64 currmid, diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index adfe0d05870108ab639f6cf8d5760d6f525146b6..6e68aaf5bd20357e35b0aa090ca75acc9f3b4017 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1289,7 +1289,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses, out: /* * This will dequeue all mids. After this it is important that the - * demultiplex_thread will not process any of these mids any futher. + * demultiplex_thread will not process any of these mids any further. * This is prevented above by using a noop callback that will not * wake this thread except for the very last PDU. */ diff --git a/fs/smb/common/smb2pdu.h b/fs/smb/common/smb2pdu.h index c3ee42188d252ecb851e3c9fa64a7ba34d4f5278..c769f9dbc0b467929cb9877817d61720a7349d76 100644 --- a/fs/smb/common/smb2pdu.h +++ b/fs/smb/common/smb2pdu.h @@ -1216,6 +1216,8 @@ struct create_context { ); __u8 Buffer[]; } __packed; +static_assert(offsetof(struct create_context, Buffer) == sizeof(struct create_context_hdr), + "struct member likely outside of __struct_group()"); struct smb2_create_req { struct smb2_hdr hdr; diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 09e1e7771592f522e44e309505078e04b6853cad..7889df8112b4eecf6b0ee91d2c04ec84e3a54326 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -165,11 +165,43 @@ void ksmbd_all_conn_set_status(u64 sess_id, u32 status) up_read(&conn_list_lock); } -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id) +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn) { wait_event(conn->req_running_q, atomic_read(&conn->req_running) < 2); } +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id) +{ + struct ksmbd_conn *conn; + int rc, retry_count = 0, max_timeout = 120; + int rcount = 1; + +retry_idle: + if (retry_count >= max_timeout) + return -EIO; + + down_read(&conn_list_lock); + list_for_each_entry(conn, &conn_list, conns_list) { + if (conn->binding || xa_load(&conn->sessions, sess_id)) { + if (conn == curr_conn) + rcount = 2; + if (atomic_read(&conn->req_running) >= rcount) { + rc = wait_event_timeout(conn->req_running_q, + atomic_read(&conn->req_running) < rcount, + HZ); + if (!rc) { + up_read(&conn_list_lock); + retry_count++; + goto retry_idle; + } + } + } + } + up_read(&conn_list_lock); + + return 0; +} + int ksmbd_conn_write(struct ksmbd_work *work) { struct ksmbd_conn *conn = work->conn; diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 5c2845e47cf2df5f33bba86bfb3e88ae1917635f..5b947175c048eb77597dfab486f60dd183d0d329 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -145,7 +145,8 @@ extern struct list_head conn_list; extern struct rw_semaphore conn_list_lock; bool ksmbd_conn_alive(struct ksmbd_conn *conn); -void ksmbd_conn_wait_idle(struct ksmbd_conn *conn, u64 sess_id); +void ksmbd_conn_wait_idle(struct ksmbd_conn *conn); +int ksmbd_conn_wait_idle_sess_id(struct ksmbd_conn *curr_conn, u64 sess_id); struct ksmbd_conn *ksmbd_conn_alloc(void); void ksmbd_conn_free(struct ksmbd_conn *conn); bool ksmbd_conn_lookup_dialect(struct ksmbd_conn *c); diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index e0a6b758094fc57c5fd47d0be2ab9f4d1e94ee70..d8d03070ae44b46dab1505227f617218d09562e2 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -15,6 +15,7 @@ #include "share_config.h" #include "user_config.h" #include "user_session.h" +#include "../connection.h" #include "../transport_ipc.h" #include "../misc.h" @@ -120,12 +121,13 @@ static int parse_veto_list(struct ksmbd_share_config *share, return 0; } -static struct ksmbd_share_config *share_config_request(struct unicode_map *um, +static struct ksmbd_share_config *share_config_request(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config_response *resp; struct ksmbd_share_config *share = NULL; struct ksmbd_share_config *lookup; + struct unicode_map *um = work->conn->um; int ret; resp = ksmbd_ipc_share_config_request(name); @@ -181,7 +183,14 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, KSMBD_SHARE_CONFIG_VETO_LIST(resp), resp->veto_list_sz); if (!ret && share->path) { + if (__ksmbd_override_fsids(work, share)) { + kill_share(share); + share = NULL; + goto out; + } + ret = kern_path(share->path, 0, &share->vfs_path); + ksmbd_revert_fsids(work); if (ret) { ksmbd_debug(SMB, "failed to access '%s'\n", share->path); @@ -214,7 +223,7 @@ out: return share; } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name) { struct ksmbd_share_config *share; @@ -227,7 +236,7 @@ struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, if (share) return share; - return share_config_request(um, name); + return share_config_request(work, name); } bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, diff --git a/fs/smb/server/mgmt/share_config.h b/fs/smb/server/mgmt/share_config.h index 5f591751b92365eac17d7e391ccdd57bc0a2e9b6..d4ac2dd4de204053e62634a2149fc588b9fad77f 100644 --- a/fs/smb/server/mgmt/share_config.h +++ b/fs/smb/server/mgmt/share_config.h @@ -11,6 +11,8 @@ #include #include +struct ksmbd_work; + struct ksmbd_share_config { char *name; char *path; @@ -68,7 +70,7 @@ static inline void ksmbd_share_config_put(struct ksmbd_share_config *share) __ksmbd_share_config_put(share); } -struct ksmbd_share_config *ksmbd_share_config_get(struct unicode_map *um, +struct ksmbd_share_config *ksmbd_share_config_get(struct ksmbd_work *work, const char *name); bool ksmbd_share_veto_filename(struct ksmbd_share_config *share, const char *filename); diff --git a/fs/smb/server/mgmt/tree_connect.c b/fs/smb/server/mgmt/tree_connect.c index d2c81a8a11dda10ef710bc97e1c059073fddb700..94a52a75014a4384fb80fbc461f3f558341c4f55 100644 --- a/fs/smb/server/mgmt/tree_connect.c +++ b/fs/smb/server/mgmt/tree_connect.c @@ -16,17 +16,18 @@ #include "user_session.h" struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name) +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name) { struct ksmbd_tree_conn_status status = {-ENOENT, NULL}; struct ksmbd_tree_connect_response *resp = NULL; struct ksmbd_share_config *sc; struct ksmbd_tree_connect *tree_conn = NULL; struct sockaddr *peer_addr; + struct ksmbd_conn *conn = work->conn; + struct ksmbd_session *sess = work->sess; int ret; - sc = ksmbd_share_config_get(conn->um, share_name); + sc = ksmbd_share_config_get(work, share_name); if (!sc) return status; @@ -61,7 +62,7 @@ ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, struct ksmbd_share_config *new_sc; ksmbd_share_config_del(sc); - new_sc = ksmbd_share_config_get(conn->um, share_name); + new_sc = ksmbd_share_config_get(work, share_name); if (!new_sc) { pr_err("Failed to update stale share config\n"); status.ret = -ESTALE; diff --git a/fs/smb/server/mgmt/tree_connect.h b/fs/smb/server/mgmt/tree_connect.h index 6377a70b811c8993575d96889a8c2ef5f2fada2c..a42cdd0510411437fe074ee3359a648496a36b13 100644 --- a/fs/smb/server/mgmt/tree_connect.h +++ b/fs/smb/server/mgmt/tree_connect.h @@ -13,6 +13,7 @@ struct ksmbd_share_config; struct ksmbd_user; struct ksmbd_conn; +struct ksmbd_work; enum { TREE_NEW = 0, @@ -50,8 +51,7 @@ static inline int test_tree_conn_flag(struct ksmbd_tree_connect *tree_conn, struct ksmbd_session; struct ksmbd_tree_conn_status -ksmbd_tree_conn_connect(struct ksmbd_conn *conn, struct ksmbd_session *sess, - const char *share_name); +ksmbd_tree_conn_connect(struct ksmbd_work *work, const char *share_name); void ksmbd_tree_connect_put(struct ksmbd_tree_connect *tcon); int ksmbd_tree_conn_disconnect(struct ksmbd_session *sess, diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c index 162a12685d2c956222eae5438c8887afe3711c20..99416ce9f50183dec9bb1da9fc495b0255396c0c 100644 --- a/fs/smb/server/mgmt/user_session.c +++ b/fs/smb/server/mgmt/user_session.c @@ -311,6 +311,7 @@ void destroy_previous_session(struct ksmbd_conn *conn, { struct ksmbd_session *prev_sess; struct ksmbd_user *prev_user; + int err; down_write(&sessions_table_lock); down_write(&conn->session_lock); @@ -325,8 +326,16 @@ void destroy_previous_session(struct ksmbd_conn *conn, memcmp(user->passkey, prev_user->passkey, user->passkey_sz)) goto out; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_RECONNECT); + err = ksmbd_conn_wait_idle_sess_id(conn, id); + if (err) { + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); + goto out; + } + ksmbd_destroy_file_table(&prev_sess->file_table); prev_sess->state = SMB2_SESSION_EXPIRED; + ksmbd_all_conn_set_status(id, KSMBD_SESS_NEED_NEGOTIATE); ksmbd_launch_ksmbd_durable_scavenger(); out: up_write(&conn->session_lock); diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index a8f52c4ebbdadd2c966b5a247058c3205be6e0f3..e546ffa57b55ab8ae646731f5a76621a7bb29768 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1510,7 +1510,7 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) * parse_lease_state() - parse lease context containted in file open request * @open_req: buffer containing smb2 file open(create) request * - * Return: oplock state, -ENOENT if create lease context not found + * Return: allocated lease context object on success, otherwise NULL */ struct lease_ctx_info *parse_lease_state(void *open_req) { diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index 37a39ab4ee6543fbec2adb6dcb5fdfef93492e8d..20846a4d3031fbb351dd71c3695b4f916f308033 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -519,7 +519,7 @@ int init_smb2_rsp_hdr(struct ksmbd_work *work) * smb2_allocate_rsp_buf() - allocate smb2 response buffer * @work: smb work containing smb request buffer * - * Return: 0 on success, otherwise -ENOMEM + * Return: 0 on success, otherwise error */ int smb2_allocate_rsp_buf(struct ksmbd_work *work) { @@ -1370,7 +1370,8 @@ static int ntlm_negotiate(struct ksmbd_work *work, } sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); out: @@ -1453,7 +1454,9 @@ static int ntlm_authenticate(struct ksmbd_work *work, return -ENOMEM; sz = le16_to_cpu(rsp->SecurityBufferOffset); - memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, spnego_blob_len); + unsafe_memcpy((char *)&rsp->hdr.ProtocolId + sz, spnego_blob, + spnego_blob_len, + /* alloc is larger than blob, see smb2_allocate_rsp_buf() */); rsp->SecurityBufferLength = cpu_to_le16(spnego_blob_len); kfree(spnego_blob); } @@ -1955,7 +1958,7 @@ int smb2_tree_connect(struct ksmbd_work *work) ksmbd_debug(SMB, "tree connect request for tree %s treename %s\n", name, treename); - status = ksmbd_tree_conn_connect(conn, sess, name); + status = ksmbd_tree_conn_connect(work, name); if (status.ret == KSMBD_TREE_CONN_STATUS_OK) rsp->hdr.Id.SyncId.TreeId = cpu_to_le32(status.tree_conn->id); else @@ -2210,7 +2213,7 @@ int smb2_session_logoff(struct ksmbd_work *work) ksmbd_conn_unlock(conn); ksmbd_close_session_fds(work); - ksmbd_conn_wait_idle(conn, sess_id); + ksmbd_conn_wait_idle(conn); /* * Re-lookup session to validate if session is deleted @@ -2767,8 +2770,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, } } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { dh_info->CreateGuid = durable_v2_blob->CreateGuid; dh_info->persistent = @@ -2788,8 +2791,8 @@ static int parse_durable_handle_context(struct ksmbd_work *work, goto out; } - if (((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || - req_op_level == SMB2_OPLOCK_LEVEL_BATCH)) { + if ((lc && (lc->req_state & SMB2_LEASE_HANDLE_CACHING_LE)) || + req_op_level == SMB2_OPLOCK_LEVEL_BATCH) { ksmbd_debug(SMB, "Request for durable open\n"); dh_info->type = dh_idx; } @@ -3093,7 +3096,6 @@ int smb2_open(struct ksmbd_work *work) goto err_out; } - file_present = true; idmap = mnt_idmap(path.mnt); } else { if (rc != -ENOENT) @@ -3411,7 +3413,7 @@ int smb2_open(struct ksmbd_work *work) goto err_out1; } } else { - if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE) { + if (req_op_level == SMB2_OPLOCK_LEVEL_LEASE && lc) { if (S_ISDIR(file_inode(filp)->i_mode)) { lc->req_state &= ~SMB2_LEASE_WRITE_CACHING_LE; lc->is_dir = true; @@ -3710,7 +3712,7 @@ err_out2: kfree(name); kfree(lc); - return 0; + return rc; } static int readdir_info_level_struct_sz(int info_level) @@ -4406,7 +4408,8 @@ int smb2_query_dir(struct ksmbd_work *work) rsp->OutputBufferLength = cpu_to_le32(0); rsp->Buffer[0] = 0; rc = ksmbd_iov_pin_rsp(work, (void *)rsp, - sizeof(struct smb2_query_directory_rsp)); + offsetof(struct smb2_query_directory_rsp, Buffer) + + 1); if (rc) goto err_out; } else { @@ -5357,7 +5360,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, "NTFS", PATH_MAX, conn->local_nls, 0); len = len * 2; info->FileSystemNameLen = cpu_to_le32(len); - sz = sizeof(struct filesystem_attribute_info) - 2 + len; + sz = sizeof(struct filesystem_attribute_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5383,7 +5386,7 @@ static int smb2_get_info_filesystem(struct ksmbd_work *work, len = len * 2; info->VolumeLabelSize = cpu_to_le32(len); info->Reserved = 0; - sz = sizeof(struct filesystem_vol_info) - 2 + len; + sz = sizeof(struct filesystem_vol_info) + len; rsp->OutputBufferLength = cpu_to_le32(sz); break; } @@ -5596,6 +5599,11 @@ int smb2_query_info(struct ksmbd_work *work) ksmbd_debug(SMB, "GOT query info request\n"); + if (ksmbd_override_fsids(work)) { + rc = -ENOMEM; + goto err_out; + } + switch (req->InfoType) { case SMB2_O_INFO_FILE: ksmbd_debug(SMB, "GOT SMB2_O_INFO_FILE\n"); @@ -5614,6 +5622,7 @@ int smb2_query_info(struct ksmbd_work *work) req->InfoType); rc = -EOPNOTSUPP; } + ksmbd_revert_fsids(work); if (!rc) { rsp->StructureSize = cpu_to_le16(9); @@ -5623,6 +5632,7 @@ int smb2_query_info(struct ksmbd_work *work) le32_to_cpu(rsp->OutputBufferLength)); } +err_out: if (rc < 0) { if (rc == -EACCES) rsp->hdr.Status = STATUS_ACCESS_DENIED; diff --git a/fs/smb/server/smb_common.c b/fs/smb/server/smb_common.c index 474dadf6b7b8bc51b3bfa94c6d29f02621f21cd5..13818ecb6e1b2fb24d8424d0bc70083b810adfe0 100644 --- a/fs/smb/server/smb_common.c +++ b/fs/smb/server/smb_common.c @@ -732,10 +732,10 @@ bool is_asterisk(char *p) return p && p[0] == '*'; } -int ksmbd_override_fsids(struct ksmbd_work *work) +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share) { struct ksmbd_session *sess = work->sess; - struct ksmbd_share_config *share = work->tcon->share_conf; struct cred *cred; struct group_info *gi; unsigned int uid; @@ -775,6 +775,11 @@ int ksmbd_override_fsids(struct ksmbd_work *work) return 0; } +int ksmbd_override_fsids(struct ksmbd_work *work) +{ + return __ksmbd_override_fsids(work, work->tcon->share_conf); +} + void ksmbd_revert_fsids(struct ksmbd_work *work) { const struct cred *cred; diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h index f1092519c0c288223b1ad7d2e66e9545fc5bcac3..cc1d6dfe29d56534e86ae24128dcc2464506e6e1 100644 --- a/fs/smb/server/smb_common.h +++ b/fs/smb/server/smb_common.h @@ -213,7 +213,7 @@ struct filesystem_attribute_info { __le32 Attributes; __le32 MaxPathNameComponentLength; __le32 FileSystemNameLen; - __le16 FileSystemName[1]; /* do not have to save this - get subset? */ + __le16 FileSystemName[]; /* do not have to save this - get subset? */ } __packed; struct filesystem_device_info { @@ -226,7 +226,7 @@ struct filesystem_vol_info { __le32 SerialNumber; __le32 VolumeLabelSize; __le16 Reserved; - __le16 VolumeLabel[1]; + __le16 VolumeLabel[]; } __packed; struct filesystem_info { @@ -447,6 +447,8 @@ int ksmbd_extract_shortname(struct ksmbd_conn *conn, int ksmbd_smb_negotiate_common(struct ksmbd_work *work, unsigned int command); int ksmbd_smb_check_shared_mode(struct file *filp, struct ksmbd_file *curr_fp); +int __ksmbd_override_fsids(struct ksmbd_work *work, + struct ksmbd_share_config *share); int ksmbd_override_fsids(struct ksmbd_work *work); void ksmbd_revert_fsids(struct ksmbd_work *work); diff --git a/fs/squashfs/inode.c b/fs/squashfs/inode.c index 16bd693d0b3aa23ce87af9cc1540e113a4c2a286..d5918eba27e3711f62d7b4b8a1d7972f22ac6ca5 100644 --- a/fs/squashfs/inode.c +++ b/fs/squashfs/inode.c @@ -279,8 +279,13 @@ int squashfs_read_inode(struct inode *inode, long long ino) if (err < 0) goto failed_read; - set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_size = le32_to_cpu(sqsh_ino->symlink_size); + if (inode->i_size > PAGE_SIZE) { + ERROR("Corrupted symlink\n"); + return -EINVAL; + } + + set_nlink(inode, le32_to_cpu(sqsh_ino->nlink)); inode->i_op = &squashfs_symlink_inode_ops; inode_nohighmem(inode); inode->i_data.a_ops = &squashfs_symlink_aops; diff --git a/fs/super.c b/fs/super.c index 38d72a3cf6fcf88560f13faae8c2452de1fbad8c..b7913b55debc1fc489cae188d438c62d4da87504 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1802,8 +1802,8 @@ int vfs_get_tree(struct fs_context *fc) return error; if (!fc->root) { - pr_err("Filesystem %s get_tree() didn't set fc->root\n", - fc->fs_type->name); + pr_err("Filesystem %s get_tree() didn't set fc->root, returned %i\n", + fc->fs_type->name, error); /* We don't know what the locking state of the superblock is - * if there is a superblock. */ diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 5d88c184f0fc15321993577eca50325d78db0b68..01e99e98457ddbf35f04f9702ab0f8d15f81f35f 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -112,7 +112,7 @@ static void release_ei(struct kref *ref) entry->release(entry->name, ei->data); } - call_rcu(&ei->rcu, free_ei_rcu); + call_srcu(&eventfs_srcu, &ei->rcu, free_ei_rcu); } static inline void put_ei(struct eventfs_inode *ei) @@ -736,7 +736,7 @@ struct eventfs_inode *eventfs_create_dir(const char *name, struct eventfs_inode /* Was the parent freed? */ if (list_empty(&ei->list)) { cleanup_ei(ei); - ei = NULL; + ei = ERR_PTR(-EBUSY); } return ei; } diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 1028ab6d9a744b7dde0195b9c5e090b6e8ea2adc..1748dff58c3bc95e1b147d3501fad023f5462f9b 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -42,7 +42,7 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) struct tracefs_inode *ti; unsigned long flags; - ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); + ti = alloc_inode_sb(sb, tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; @@ -53,15 +53,14 @@ static struct inode *tracefs_alloc_inode(struct super_block *sb) return &ti->vfs_inode; } -static void tracefs_free_inode_rcu(struct rcu_head *rcu) +static void tracefs_free_inode(struct inode *inode) { - struct tracefs_inode *ti; + struct tracefs_inode *ti = get_tracefs(inode); - ti = container_of(rcu, struct tracefs_inode, rcu); kmem_cache_free(tracefs_inode_cachep, ti); } -static void tracefs_free_inode(struct inode *inode) +static void tracefs_destroy_inode(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); unsigned long flags; @@ -69,8 +68,6 @@ static void tracefs_free_inode(struct inode *inode) spin_lock_irqsave(&tracefs_inode_lock, flags); list_del_rcu(&ti->list); spin_unlock_irqrestore(&tracefs_inode_lock, flags); - - call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -437,6 +434,7 @@ static int tracefs_drop_inode(struct inode *inode) static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, + .destroy_inode = tracefs_destroy_inode, .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .show_options = tracefs_show_options, diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index f704d8348357ed8f8a2a11195718b716e7277244..d83c2a25f288e0fbdc2e47de9b711cddf17746c6 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -10,10 +10,7 @@ enum { }; struct tracefs_inode { - union { - struct inode vfs_inode; - struct rcu_head rcu; - }; + struct inode vfs_inode; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ struct list_head list; unsigned long flags; diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index cb035da3f990b0d9db135678c7a09d70040cde58..fb05f44f6c754ac214145d8690a7e42ebc549ece 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -56,7 +56,7 @@ typedef uint8_t xfs_dqtype_t; * And, of course, we also need to take into account the dquot log format item * used to describe each dquot. */ -#define XFS_DQUOT_LOGRES(mp) \ +#define XFS_DQUOT_LOGRES \ ((sizeof(struct xfs_dq_logformat) + sizeof(struct xfs_disk_dquot)) * 6) #define XFS_IS_QUOTA_ON(mp) ((mp)->m_qflags & XFS_ALL_QUOTA_ACCT) diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c index 3dc8f785bf2947dec439cd5c9009772ab3ebef24..45aaf169806aa5ec4b23cf17d7d41406d8be4a1c 100644 --- a/fs/xfs/libxfs/xfs_trans_resv.c +++ b/fs/xfs/libxfs/xfs_trans_resv.c @@ -338,11 +338,11 @@ xfs_calc_write_reservation( blksz); t1 += adj; t3 += adj; - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 1); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -410,11 +410,11 @@ xfs_calc_itruncate_reservation( xfs_refcountbt_block_count(mp, 4), blksz); - return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3); + return XFS_DQUOT_LOGRES + max3(t1, t2, t3); } t4 = xfs_calc_refcountbt_reservation(mp, 2); - return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3)); + return XFS_DQUOT_LOGRES + max(t4, max3(t1, t2, t3)); } unsigned int @@ -466,7 +466,7 @@ STATIC uint xfs_calc_rename_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -577,7 +577,7 @@ STATIC uint xfs_calc_link_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -641,7 +641,7 @@ STATIC uint xfs_calc_remove_reservation( struct xfs_mount *mp) { - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; struct xfs_trans_resv *resp = M_RES(mp); unsigned int t1, t2, t3 = 0; @@ -729,7 +729,7 @@ xfs_calc_icreate_reservation( struct xfs_mount *mp) { struct xfs_trans_resv *resp = M_RES(mp); - unsigned int overhead = XFS_DQUOT_LOGRES(mp); + unsigned int overhead = XFS_DQUOT_LOGRES; unsigned int t1, t2, t3 = 0; t1 = xfs_calc_icreate_resv_alloc(mp); @@ -747,7 +747,7 @@ STATIC uint xfs_calc_create_tmpfile_reservation( struct xfs_mount *mp) { - uint res = XFS_DQUOT_LOGRES(mp); + uint res = XFS_DQUOT_LOGRES; res += xfs_calc_icreate_resv_alloc(mp); return res + xfs_calc_iunlink_add_reservation(mp); @@ -829,7 +829,7 @@ STATIC uint xfs_calc_ifree_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + xfs_calc_iunlink_remove_reservation(mp) + @@ -846,7 +846,7 @@ STATIC uint xfs_calc_ichange_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); @@ -955,7 +955,7 @@ STATIC uint xfs_calc_addafork_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(1, mp->m_dir_geo->blksize) + @@ -1003,7 +1003,7 @@ STATIC uint xfs_calc_attrsetm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); @@ -1043,7 +1043,7 @@ STATIC uint xfs_calc_attrrm_reservation( struct xfs_mount *mp) { - return XFS_DQUOT_LOGRES(mp) + + return XFS_DQUOT_LOGRES + max((xfs_calc_inode_res(mp, 1) + xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)) + diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 0dbc484b182f9813adc26e3efcf3cd1d6a73b652..2f98d90d7fd66d099c9f8f78725e77700c540a78 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -696,7 +696,7 @@ xrep_agfl_init_header( * step. */ xagb_bitmap_init(&af.used_extents); - af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp), + af.agfl_bno = xfs_buf_to_agfl_bno(agfl_bp); xagb_bitmap_walk(agfl_extents, xrep_agfl_fill, &af); error = xagb_bitmap_disunion(agfl_extents, &af.used_extents); if (error) diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index 24a15bf784f11b06ab0f46dae93d84099b14e1c8..5ab2ac53c920020816b39d93562fa819454a72b2 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -938,7 +938,13 @@ xchk_bmap( } break; case XFS_ATTR_FORK: - if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) + /* + * "attr" means that an attr fork was created at some point in + * the life of this filesystem. "attr2" means that inodes have + * variable-sized data/attr fork areas. Hence we only check + * attr here. + */ + if (!xfs_has_attr(mp)) xchk_ino_set_corrupt(sc, sc->ip->i_ino); break; default: diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 733c410a22797a91c3d2395677332667dc9496e3..91e7b51ce0680bc891206255afd4fe20f5f78739 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -799,7 +799,7 @@ xchk_parent_pptr( } if (pp->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) - goto out_pp; + goto out_names; /* * Complain if the number of parent pointers doesn't match the link diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 92ef4cdc486e901ab786bc538faf061c350270d5..c886d5d0eb021a7683a3f84d2819be052697c305 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -959,18 +959,16 @@ TRACE_EVENT(xfile_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; __entry->ino = file_inode(xf->file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(xf->file, pathname, sizeof(pathname) - 1); + path = file_path(xf->file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("xfino 0x%lx path '%s'", __entry->ino, diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index 5c947e5ce8b8811d659c9b964f0d2e74fc2bf7c1..7db38630487514d553facdc3421e37349c561e3d 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -139,7 +139,7 @@ xfs_attr_shortform_list( sbp->name = sfe->nameval; sbp->namelen = sfe->namelen; /* These are bytes, and both on-disk, don't endian-flip */ - sbp->value = &sfe->nameval[sfe->namelen], + sbp->value = &sfe->nameval[sfe->namelen]; sbp->valuelen = sfe->valuelen; sbp->flags = sfe->flags; sbp->hash = xfs_attr_hashval(dp->i_mount, sfe->flags, diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 4e933db75b12b94ad4b76113ec226fefbe819f1c..6b13666d4e96355aba7de80b5a1718df54dcd518 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -483,6 +483,17 @@ xfs_ioctl_setattr_xflags( /* Can't change realtime flag if any extents are allocated. */ if (ip->i_df.if_nextents || ip->i_delayed_blks) return -EINVAL; + + /* + * If S_DAX is enabled on this file, we can only switch the + * device if both support fsdax. We can't update S_DAX because + * there might be other threads walking down the access paths. + */ + if (IS_DAX(VFS_I(ip)) && + (mp->m_ddev_targp->bt_daxdev == NULL || + (mp->m_rtdev_targp && + mp->m_rtdev_targp->bt_daxdev == NULL))) + return -EINVAL; } if (rtflag) { diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 5646d300b2861e1f6de24594d17dabda371a80b2..180ce697305a92ea829f7e55ac5af55ce7ed8709 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -4715,20 +4715,18 @@ TRACE_EVENT(xmbuf_create, TP_STRUCT__entry( __field(dev_t, dev) __field(unsigned long, ino) - __array(char, pathname, 256) + __array(char, pathname, MAXNAMELEN) ), TP_fast_assign( - char pathname[257]; char *path; struct file *file = btp->bt_file; __entry->dev = btp->bt_mount->m_super->s_dev; __entry->ino = file_inode(file)->i_ino; - memset(pathname, 0, sizeof(pathname)); - path = file_path(file, pathname, sizeof(pathname) - 1); + path = file_path(file, __entry->pathname, MAXNAMELEN); if (IS_ERR(path)) - path = "(unknown)"; - strncpy(__entry->pathname, path, sizeof(__entry->pathname)); + strncpy(__entry->pathname, "(unknown)", + sizeof(__entry->pathname)); ), TP_printk("dev %d:%d xmino 0x%lx path '%s'", MAJOR(__entry->dev), MINOR(__entry->dev), diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 0fafcc9f3dbe4447525bb0fc964d27089a791331..8ede9d099d1fea6b6399b5a0617246baee057c07 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -644,7 +644,12 @@ xfsaild( set_freezable(); while (1) { - if (tout) + /* + * Long waits of 50ms or more occur when we've run out of items + * to push, so we only want uninterruptible state if we're + * actually blocked on something. + */ + if (tout && tout <= 20) set_current_state(TASK_KILLABLE|TASK_FREEZABLE); else set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index ab3d22f662f280cd08bcc463599c2286d2b351ba..eaf849260bd672fc1ce02040902b4fff9c5676ce 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -110,7 +110,24 @@ xfs_attr_change( args->whichfork = XFS_ATTR_FORK; xfs_attr_sethash(args); - return xfs_attr_set(args, op, args->attr_filter & XFS_ATTR_ROOT); + /* + * Some xattrs must be resistant to allocation failure at ENOSPC, e.g. + * creating an inode with ACLs or security attributes requires the + * allocation of the xattr holding that information to succeed. Hence + * we allow xattrs in the VFS TRUSTED, SYSTEM, POSIX_ACL and SECURITY + * (LSM xattr) namespaces to dip into the reserve block pool to allow + * manipulation of these xattrs when at ENOSPC. These VFS xattr + * namespaces translate to the XFS_ATTR_ROOT and XFS_ATTR_SECURE on-disk + * namespaces. + * + * For most of these cases, these special xattrs will fit in the inode + * itself and so consume no extra space or only require temporary extra + * space while an overwrite is being made. Hence the use of the reserved + * pool is largely to avoid the worst case reservation from preventing + * the xattr from being created at ENOSPC. + */ + return xfs_attr_set(args, op, + args->attr_filter & (XFS_ATTR_ROOT | XFS_ATTR_SECURE)); } diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index 80dc36f9d5274c952d350e5c93fe0acf14d9e5e8..9f1c1d225e32c62a7f728a2711d5ff9045e86bcd 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -660,12 +660,9 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status void *context)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_execute_reg_methods(acpi_handle device, + u32 nax_depth, acpi_adr_space_type space_id)) -ACPI_EXTERNAL_RETURN_STATUS(acpi_status - acpi_execute_orphan_reg_method(acpi_handle device, - acpi_adr_space_type - space_id)) ACPI_EXTERNAL_RETURN_STATUS(acpi_status acpi_remove_address_space_handler(acpi_handle device, diff --git a/include/acpi/video.h b/include/acpi/video.h index 3d538d4178abb1a324adb8def3d5132abd71925c..044c463138df8148dceb37b8cd651f580f82c011 100644 --- a/include/acpi/video.h +++ b/include/acpi/video.h @@ -50,6 +50,7 @@ enum acpi_backlight_type { acpi_backlight_native, acpi_backlight_nvidia_wmi_ec, acpi_backlight_apple_gmux, + acpi_backlight_dell_uart, }; #if IS_ENABLED(CONFIG_ACPI_VIDEO) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index ad6afc5c49183a8043536fe7129901d3fa9c5717..1ae44793132a8d9a6349aff7dd51299f50cf820e 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -911,13 +911,12 @@ #define CON_INITCALL \ BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end) -#define RUNTIME_NAME(t,x) runtime_##t##_##x +#define NAMED_SECTION(name) \ + . = ALIGN(8); \ + name : AT(ADDR(name) - LOAD_OFFSET) \ + { BOUNDED_SECTION_PRE_LABEL(name, name, __start_, __stop_) } -#define RUNTIME_CONST(t,x) \ - . = ALIGN(8); \ - RUNTIME_NAME(t,x) : AT(ADDR(RUNTIME_NAME(t,x)) - LOAD_OFFSET) { \ - *(RUNTIME_NAME(t,x)); \ - } +#define RUNTIME_CONST(t,x) NAMED_SECTION(runtime_##t##_##x) /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */ #define KUNIT_TABLE() \ diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce554831bb3fa43b79a8a17aec91562..9689a7c5dd36b25d7ddefa0ec9fd4de210349829 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 8c4768c44a01b3221b3da74970ba3c445eaa7fbd..d3b66d77df7a3c029e6e2c8b50abcf531eb88dfb 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -270,6 +270,18 @@ static inline void bitmap_copy_clear_tail(unsigned long *dst, dst[nbits / BITS_PER_LONG] &= BITMAP_LAST_WORD_MASK(nbits); } +static inline void bitmap_copy_and_extend(unsigned long *to, + const unsigned long *from, + unsigned int count, unsigned int size) +{ + unsigned int copy = BITS_TO_LONGS(count); + + memcpy(to, from, copy * sizeof(long)); + if (count % BITS_PER_LONG) + to[copy - 1] &= BITMAP_LAST_WORD_MASK(count); + memset(to + copy, 0, bitmap_size(size) - copy * sizeof(long)); +} + /* * On 32-bit systems bitmaps are represented as u32 arrays internally. On LE64 * machines the order of hi and lo parts of numbers match the bitmap structure. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e85ec73a07d575e13caf1f5f3639fd957b54200b..b7664d593486a8b56597fea32aa2c7ad4aeb2bbb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1296,12 +1296,7 @@ bdev_max_secure_erase_sectors(struct block_device *bdev) static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev) { - struct request_queue *q = bdev_get_queue(bdev); - - if (q) - return q->limits.max_write_zeroes_sectors; - - return 0; + return bdev_get_queue(bdev)->limits.max_write_zeroes_sectors; } static inline bool bdev_nonrot(struct block_device *bdev) diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 6503c85b10a30a48ec3190a7b1029a0e20ee1c09..7b776dae36e58440a340c02372cf6eae4941e89f 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -856,8 +856,8 @@ static inline u32 type_flag(u32 type) /* only use after check_attach_btf_id() */ static inline enum bpf_prog_type resolve_prog_type(const struct bpf_prog *prog) { - return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->dst_prog) ? - prog->aux->dst_prog->type : prog->type; + return (prog->type == BPF_PROG_TYPE_EXT && prog->aux->saved_dst_prog_type) ? + prog->aux->saved_dst_prog_type : prog->type; } static inline bool bpf_prog_check_recur(const struct bpf_prog *prog) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2594553bb30b10d14dda5f634398b8596e6bbb7e..2df665fa2964d36b96cf4d11ca7029870c031ca2 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -296,6 +296,15 @@ static inline void *offset_to_ptr(const int *off) #define is_signed_type(type) (((type)(-1)) < (__force type)1) #define is_unsigned_type(type) (!is_signed_type(type)) +/* + * Useful shorthand for "is this condition known at compile-time?" + * + * Note that the condition may involve non-constant values, + * but the compiler may know enough about the details of the + * values to determine that the condition is statically true. + */ +#define statically_true(x) (__builtin_constant_p(x) && (x)) + /* * This is needed in functions which generate the stack canary, see * arch/x86/kernel/smpboot.c::start_secondary() for an example. diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 51ba681b915a979e5475eecf3fc89c71623b3af6..9316c39260e0e43d92ef0cbbf9f44dc9c9d18bee 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -100,7 +100,6 @@ enum cpuhp_state { CPUHP_WORKQUEUE_PREP, CPUHP_POWER_NUMA_PREPARE, CPUHP_HRTIMERS_PREPARE, - CPUHP_PROFILE_PREPARE, CPUHP_X2APIC_PREPARE, CPUHP_SMPCFD_PREPARE, CPUHP_RELAY_PREPARE, @@ -148,6 +147,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_LOONGARCH_STARTING, CPUHP_AP_IRQ_SIFIVE_PLIC_STARTING, CPUHP_AP_IRQ_RISCV_IMSIC_STARTING, + CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 801a7e524113e74e2107b42b3c053107b6076bfa..53158de44b837854d3b2d047beebaf632f390b54 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1037,7 +1037,7 @@ void init_cpu_online(const struct cpumask *src); assign_bit(cpumask_check(cpu), cpumask_bits(mask), (val)) #define set_cpu_possible(cpu, possible) assign_cpu((cpu), &__cpu_possible_mask, (possible)) -#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_possible_mask, (enabled)) +#define set_cpu_enabled(cpu, enabled) assign_cpu((cpu), &__cpu_enabled_mask, (enabled)) #define set_cpu_present(cpu, present) assign_cpu((cpu), &__cpu_present_mask, (present)) #define set_cpu_active(cpu, active) assign_cpu((cpu), &__cpu_active_mask, (active)) #define set_cpu_dying(cpu, dying) assign_cpu((cpu), &__cpu_dying_mask, (dying)) diff --git a/include/linux/dsa/ocelot.h b/include/linux/dsa/ocelot.h index dca2969015d802d1fff972482bd4ea4eac3f535f..6fbfbde68a37c3f64e1e808d296d6083ff45c23f 100644 --- a/include/linux/dsa/ocelot.h +++ b/include/linux/dsa/ocelot.h @@ -5,6 +5,8 @@ #ifndef _NET_DSA_TAG_OCELOT_H #define _NET_DSA_TAG_OCELOT_H +#include +#include #include #include #include @@ -273,4 +275,49 @@ static inline u32 ocelot_ptp_rew_op(struct sk_buff *skb) return rew_op; } +/** + * ocelot_xmit_get_vlan_info: Determine VLAN_TCI and TAG_TYPE for injected frame + * @skb: Pointer to socket buffer + * @br: Pointer to bridge device that the port is under, if any + * @vlan_tci: + * @tag_type: + * + * If the port is under a VLAN-aware bridge, remove the VLAN header from the + * payload and move it into the DSA tag, which will make the switch classify + * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, + * which is the pvid of standalone ports (OCELOT_STANDALONE_PVID), although not + * of VLAN-unaware bridge ports (that would be ocelot_vlan_unaware_pvid()). + * Anyway, VID 0 is fine because it is stripped on egress for these port modes, + * and source address learning is not performed for packets injected from the + * CPU anyway, so it doesn't matter that the VID is "wrong". + */ +static inline void ocelot_xmit_get_vlan_info(struct sk_buff *skb, + struct net_device *br, + u64 *vlan_tci, u64 *tag_type) +{ + struct vlan_ethhdr *hdr; + u16 proto, tci; + + if (!br || !br_vlan_enabled(br)) { + *vlan_tci = 0; + *tag_type = IFH_TAG_TYPE_C; + return; + } + + hdr = (struct vlan_ethhdr *)skb_mac_header(skb); + br_vlan_get_proto(br, &proto); + + if (ntohs(hdr->h_vlan_proto) == proto) { + vlan_remove_tag(skb, &tci); + *vlan_tci = tci; + } else { + rcu_read_lock(); + br_vlan_get_pvid_rcu(br, &tci); + rcu_read_unlock(); + *vlan_tci = tci; + } + + *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; +} + #endif diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 303fda54ef17915e9c682acf01e4b80498ed7838..989c94eddb2b4ffd45c32aa0b440a6a2016f84b2 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -736,10 +736,10 @@ struct kernel_ethtool_ts_info { * @rxfh_key_space: same as @rxfh_indir_space, but for the key. * @rxfh_priv_size: size of the driver private data area the core should * allocate for an RSS context (in &struct ethtool_rxfh_context). - * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this - * is zero then the core may choose any (nonzero) ID, otherwise the core - * will only use IDs strictly less than this value, as the @rss_context - * argument to @create_rxfh_context and friends. + * @rxfh_max_num_contexts: maximum (exclusive) supported RSS context ID. + * If this is zero then the core may choose any (nonzero) ID, otherwise + * the core will only use IDs strictly less than this value, as the + * @rss_context argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -954,7 +954,7 @@ struct ethtool_ops { u32 rxfh_indir_space; u16 rxfh_key_space; u16 rxfh_priv_size; - u32 rxfh_max_context_id; + u32 rxfh_max_num_contexts; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/include/linux/file.h b/include/linux/file.h index 237931f20739f3840fecdfcce122a5984b81e8cd..59b146a14dcad02ca14091189c228bcbab229437 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -110,7 +110,7 @@ DEFINE_CLASS(get_unused_fd, int, if (_T >= 0) put_unused_fd(_T), * * f = dentry_open(&path, O_RDONLY, current_cred()); * if (IS_ERR(f)) - * return PTR_ERR(fd); + * return PTR_ERR(f); * * fd_install(fd, f); * return take_fd(fd); diff --git a/include/linux/fs.h b/include/linux/fs.h index fd34b5755c0b52988a7131493cd56fa8ea640ad9..fb0426f349fc5c93f13e8165418448ea560d8db1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,6 +2392,9 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. * + * I_LRU_ISOLATING Inode is pinned being isolated from LRU without holding + * i_count. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2415,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) #define I_PINNING_NETFS_WB (1 << 18) +#define __I_LRU_ISOLATING 19 +#define I_LRU_ISOLATING (1 << __I_LRU_ISOLATING) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index c9bf68c239a01107dfd7f055a3f02897013d312e..45bf05ad5c53a1a9390bd1e8aeb9ff96dd848664 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -944,10 +944,37 @@ static inline bool htlb_allow_alloc_fallback(int reason) static inline spinlock_t *huge_pte_lockptr(struct hstate *h, struct mm_struct *mm, pte_t *pte) { - if (huge_page_size(h) == PMD_SIZE) + const unsigned long size = huge_page_size(h); + + VM_WARN_ON(size == PAGE_SIZE); + + /* + * hugetlb must use the exact same PT locks as core-mm page table + * walkers would. When modifying a PTE table, hugetlb must take the + * PTE PT lock, when modifying a PMD table, hugetlb must take the PMD + * PT lock etc. + * + * The expectation is that any hugetlb folio smaller than a PMD is + * always mapped into a single PTE table and that any hugetlb folio + * smaller than a PUD (but at least as big as a PMD) is always mapped + * into a single PMD table. + * + * If that does not hold for an architecture, then that architecture + * must disable split PT locks such that all *_lockptr() functions + * will give us the same result: the per-MM PT lock. + * + * Note that with e.g., CONFIG_PGTABLE_LEVELS=2 where + * PGDIR_SIZE==P4D_SIZE==PUD_SIZE==PMD_SIZE, we'd use pud_lockptr() + * and core-mm would use pmd_lockptr(). However, in such configurations + * split PMD locks are disabled -- they don't make sense on a single + * PGDIR page table -- and the end result is the same. + */ + if (size >= PUD_SIZE) + return pud_lockptr(mm, (pud_t *) pte); + else if (size >= PMD_SIZE || IS_ENABLED(CONFIG_HIGHPTE)) return pmd_lockptr(mm, (pmd_t *) pte); - VM_BUG_ON(huge_page_size(h) == PAGE_SIZE); - return &mm->page_table_lock; + /* pte_alloc_huge() only applies with !CONFIG_HIGHPTE */ + return ptep_lockptr(mm, pte); } #ifndef hugepages_supported diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 07e33bbc9256dd04ca9e60921c7663cd22845fe7..377def4972985cddb9ee6ee223b2cd3521346b02 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -1066,7 +1066,7 @@ static inline int of_i2c_get_board_info(struct device *dev, struct acpi_resource; struct acpi_resource_i2c_serialbus; -#if IS_ENABLED(CONFIG_ACPI) +#if IS_REACHABLE(CONFIG_ACPI) && IS_REACHABLE(CONFIG_I2C) bool i2c_acpi_get_i2c_resource(struct acpi_resource *ares, struct acpi_resource_i2c_serialbus **i2c); int i2c_acpi_client_count(struct acpi_device *adev); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4d47f2c33311850173a1871c0c821fa15c7f01bb..04cbdae0052eb23cf6c080d2a01b3fd028f15e48 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -795,8 +795,6 @@ extern int iommu_attach_device(struct iommu_domain *domain, struct device *dev); extern void iommu_detach_device(struct iommu_domain *domain, struct device *dev); -extern int iommu_sva_unbind_gpasid(struct iommu_domain *domain, - struct device *dev, ioasid_t pasid); extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 689e8be873a755a24057985389f5c3019cfc990d..b23c6d48392f7c4e37252bc19de9aacfe6759b5f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,13 @@ static inline bool kvm_arch_has_private_mem(struct kvm *kvm) } #endif +#ifndef kvm_arch_has_readonly_mem +static inline bool kvm_arch_has_readonly_mem(struct kvm *kvm) +{ + return IS_ENABLED(CONFIG_HAVE_KVM_READONLY_MEM); +} +#endif + struct kvm_memslots { u64 generation; atomic_long_t last_used_slot; @@ -2414,7 +2421,7 @@ static inline unsigned long kvm_get_memory_attributes(struct kvm *kvm, gfn_t gfn } bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs); + unsigned long mask, unsigned long attrs); bool kvm_arch_pre_set_memory_attributes(struct kvm *kvm, struct kvm_gfn_range *range); bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, @@ -2445,11 +2452,11 @@ static inline int kvm_gmem_get_pfn(struct kvm *kvm, } #endif /* CONFIG_KVM_PRIVATE_MEM */ -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE int kvm_arch_gmem_prepare(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, int max_order); -bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); #endif +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM /** * kvm_gmem_populate() - Populate/prepare a GPA range with guest data * @@ -2476,8 +2483,9 @@ typedef int (*kvm_gmem_populate_cb)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque); +#endif -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE void kvm_arch_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end); #endif diff --git a/include/linux/minmax.h b/include/linux/minmax.h index 9c2848abc80496de84a23e2805163550ecd84599..98008dd92153db10c672155bca93201ffabee994 100644 --- a/include/linux/minmax.h +++ b/include/linux/minmax.h @@ -26,19 +26,63 @@ #define __typecheck(x, y) \ (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) -/* is_signed_type() isn't a constexpr for pointer types */ -#define __is_signed(x) \ - __builtin_choose_expr(__is_constexpr(is_signed_type(typeof(x))), \ - is_signed_type(typeof(x)), 0) +/* + * __sign_use for integer expressions: + * bit #0 set if ok for unsigned comparisons + * bit #1 set if ok for signed comparisons + * + * In particular, statically non-negative signed integer + * expressions are ok for both. + * + * NOTE! Unsigned types smaller than 'int' are implicitly + * converted to 'int' in expressions, and are accepted for + * signed conversions for now. This is debatable. + * + * Note that 'x' is the original expression, and 'ux' is + * the unique variable that contains the value. + * + * We use 'ux' for pure type checking, and 'x' for when + * we need to look at the value (but without evaluating + * it for side effects! Careful to only ever evaluate it + * with sizeof() or __builtin_constant_p() etc). + * + * Pointers end up being checked by the normal C type + * rules at the actual comparison, and these expressions + * only need to be careful to not cause warnings for + * pointer use. + */ +#define __signed_type_use(x,ux) (2+__is_nonneg(x,ux)) +#define __unsigned_type_use(x,ux) (1+2*(sizeof(ux)<4)) +#define __sign_use(x,ux) (is_signed_type(typeof(ux))? \ + __signed_type_use(x,ux):__unsigned_type_use(x,ux)) -/* True for a non-negative signed int constant */ -#define __is_noneg_int(x) \ - (__builtin_choose_expr(__is_constexpr(x) && __is_signed(x), x, -1) >= 0) +/* + * To avoid warnings about casting pointers to integers + * of different sizes, we need that special sign type. + * + * On 64-bit we can just always use 'long', since any + * integer or pointer type can just be cast to that. + * + * This does not work for 128-bit signed integers since + * the cast would truncate them, but we do not use s128 + * types in the kernel (we do use 'u128', but they will + * be handled by the !is_signed_type() case). + * + * NOTE! The cast is there only to avoid any warnings + * from when values that aren't signed integer types. + */ +#ifdef CONFIG_64BIT + #define __signed_type(ux) long +#else + #define __signed_type(ux) typeof(__builtin_choose_expr(sizeof(ux)>4,1LL,1L)) +#endif +#define __is_nonneg(x,ux) statically_true((__signed_type(ux))(x)>=0) + +#define __types_ok(x,y,ux,uy) \ + (__sign_use(x,ux) & __sign_use(y,uy)) -#define __types_ok(x, y) \ - (__is_signed(x) == __is_signed(y) || \ - __is_signed((x) + 0) == __is_signed((y) + 0) || \ - __is_noneg_int(x) || __is_noneg_int(y)) +#define __types_ok3(x,y,z,ux,uy,uz) \ + (__sign_use(x,ux) & __sign_use(y,uy) & __sign_use(z,uz)) #define __cmp_op_min < #define __cmp_op_max > @@ -51,34 +95,31 @@ #define __cmp_once(op, type, x, y) \ __cmp_once_unique(op, type, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) -#define __careful_cmp_once(op, x, y) ({ \ - static_assert(__types_ok(x, y), \ - #op "(" #x ", " #y ") signedness error, fix types or consider u" #op "() before " #op "_t()"); \ - __cmp_once(op, __auto_type, x, y); }) +#define __careful_cmp_once(op, x, y, ux, uy) ({ \ + __auto_type ux = (x); __auto_type uy = (y); \ + BUILD_BUG_ON_MSG(!__types_ok(x,y,ux,uy), \ + #op"("#x", "#y") signedness error"); \ + __cmp(op, ux, uy); }) -#define __careful_cmp(op, x, y) \ - __builtin_choose_expr(__is_constexpr((x) - (y)), \ - __cmp(op, x, y), __careful_cmp_once(op, x, y)) +#define __careful_cmp(op, x, y) \ + __careful_cmp_once(op, x, y, __UNIQUE_ID(x_), __UNIQUE_ID(y_)) #define __clamp(val, lo, hi) \ ((val) >= (hi) ? (hi) : ((val) <= (lo) ? (lo) : (val))) -#define __clamp_once(val, lo, hi, unique_val, unique_lo, unique_hi) ({ \ - typeof(val) unique_val = (val); \ - typeof(lo) unique_lo = (lo); \ - typeof(hi) unique_hi = (hi); \ +#define __clamp_once(val, lo, hi, uval, ulo, uhi) ({ \ + __auto_type uval = (val); \ + __auto_type ulo = (lo); \ + __auto_type uhi = (hi); \ static_assert(__builtin_choose_expr(__is_constexpr((lo) > (hi)), \ (lo) <= (hi), true), \ "clamp() low limit " #lo " greater than high limit " #hi); \ - static_assert(__types_ok(val, lo), "clamp() 'lo' signedness error"); \ - static_assert(__types_ok(val, hi), "clamp() 'hi' signedness error"); \ - __clamp(unique_val, unique_lo, unique_hi); }) + BUILD_BUG_ON_MSG(!__types_ok3(val,lo,hi,uval,ulo,uhi), \ + "clamp("#val", "#lo", "#hi") signedness error"); \ + __clamp(uval, ulo, uhi); }) -#define __careful_clamp(val, lo, hi) ({ \ - __builtin_choose_expr(__is_constexpr((val) - (lo) + (hi)), \ - __clamp(val, lo, hi), \ - __clamp_once(val, lo, hi, __UNIQUE_ID(__val), \ - __UNIQUE_ID(__lo), __UNIQUE_ID(__hi))); }) +#define __careful_clamp(val, lo, hi) \ + __clamp_once(val, lo, hi, __UNIQUE_ID(v_), __UNIQUE_ID(l_), __UNIQUE_ID(h_)) /** * min - return minimum of two values of the same or compatible types @@ -111,13 +152,20 @@ #define umax(x, y) \ __careful_cmp(max, (x) + 0u + 0ul + 0ull, (y) + 0u + 0ul + 0ull) +#define __careful_op3(op, x, y, z, ux, uy, uz) ({ \ + __auto_type ux = (x); __auto_type uy = (y);__auto_type uz = (z);\ + BUILD_BUG_ON_MSG(!__types_ok3(x,y,z,ux,uy,uz), \ + #op"3("#x", "#y", "#z") signedness error"); \ + __cmp(op, ux, __cmp(op, uy, uz)); }) + /** * min3 - return minimum of three values * @x: first value * @y: second value * @z: third value */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) +#define min3(x, y, z) \ + __careful_op3(min, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * max3 - return maximum of three values @@ -125,7 +173,8 @@ * @y: second value * @z: third value */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) +#define max3(x, y, z) \ + __careful_op3(max, x, y, z, __UNIQUE_ID(x_), __UNIQUE_ID(y_), __UNIQUE_ID(z_)) /** * min_not_zero - return the minimum that is _not_ zero, unless both are zero @@ -277,6 +326,8 @@ static inline bool in_range32(u32 val, u32 start, u32 len) * Use these carefully: no type checking, and uses the arguments * multiple times. Use for obvious constants only. */ +#define MIN(a,b) __cmp(min,a,b) +#define MAX(a,b) __cmp(max,a,b) #define MIN_T(type,a,b) __cmp(min,(type)(a),(type)(b)) #define MAX_T(type,a,b) __cmp(max,(type)(a),(type)(b)) diff --git a/include/linux/mm.h b/include/linux/mm.h index c4b238a20b76e906c4a29fe58cb76cb5e95a56a3..6549d0979b28f93332db0457bfa70942ff2cf43a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2920,6 +2920,13 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + BUILD_BUG_ON(IS_ENABLED(CONFIG_HIGHPTE)); + BUILD_BUG_ON(MAX_PTRS_PER_PTE * sizeof(pte_t) > PAGE_SIZE); + return ptlock_ptr(virt_to_ptdesc(pte)); +} + static inline bool ptlock_init(struct ptdesc *ptdesc) { /* @@ -2944,6 +2951,10 @@ static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) { return &mm->page_table_lock; } +static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) +{ + return &mm->page_table_lock; +} static inline void ptlock_cache_init(void) {} static inline bool ptlock_init(struct ptdesc *ptdesc) { return true; } static inline void ptlock_free(struct ptdesc *ptdesc) {} diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 41458892bc8a346d8d51eca6fc24d5ded4e0b682..1dc6248feb83246d39e85425ba49725b7ca9269e 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -220,8 +220,6 @@ enum node_stat_item { PGDEMOTE_KSWAPD, PGDEMOTE_DIRECT, PGDEMOTE_KHUGEPAGED, - NR_MEMMAP, /* page metadata allocated through buddy allocator */ - NR_MEMMAP_BOOT, /* page metadata allocated through boot allocator */ NR_VM_NODE_STAT_ITEMS }; diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 5d0288938cc2dc33579ae70bf462807340cf3a72..983816608f15d7b3c3dc0513bbcc0490f7c45c77 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -73,8 +73,6 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ #define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */ -#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark - * write to cache on read */ }; /* @@ -269,7 +267,6 @@ struct netfs_io_request { #define NETFS_RREQ_DONT_UNLOCK_FOLIOS 3 /* Don't unlock the folios on completion */ #define NETFS_RREQ_FAILED 4 /* The request failed */ #define NETFS_RREQ_IN_PROGRESS 5 /* Unlocked when the request completes */ -#define NETFS_RREQ_WRITE_TO_CACHE 7 /* Need to write to the cache */ #define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */ #define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */ #define NETFS_RREQ_BLOCKED 10 /* We blocked */ diff --git a/include/linux/panic.h b/include/linux/panic.h index 3130e0b5116b0367c00ca940cb9aa95e0e95de0d..54d90b6c5f47bd6978654b3f61261943a37e2052 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -16,6 +16,7 @@ extern void oops_enter(void); extern void oops_exit(void); extern bool oops_may_print(void); +extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; diff --git a/include/linux/pgalloc_tag.h b/include/linux/pgalloc_tag.h index 18cd0c0c73d93d9c80ba46b708763dad3c15752e..207f0c83c8e9750ac7050cc488f1618a69c4fd5c 100644 --- a/include/linux/pgalloc_tag.h +++ b/include/linux/pgalloc_tag.h @@ -43,6 +43,18 @@ static inline void put_page_tag_ref(union codetag_ref *ref) page_ext_put(page_ext_from_codetag_ref(ref)); } +static inline void clear_page_tag_ref(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } +} + static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) { @@ -126,6 +138,7 @@ static inline void pgalloc_tag_sub_pages(struct alloc_tag *tag, unsigned int nr) static inline union codetag_ref *get_page_tag_ref(struct page *page) { return NULL; } static inline void put_page_tag_ref(union codetag_ref *ref) {} +static inline void clear_page_tag_ref(struct page *page) {} static inline void pgalloc_tag_add(struct page *page, struct task_struct *task, unsigned int nr) {} static inline void pgalloc_tag_sub(struct page *page, unsigned int nr) {} diff --git a/include/linux/profile.h b/include/linux/profile.h index 2fb487f61d12abf8e8a881467cf598e646995fa0..3f53cdb0c27c1a2be13d63199f2949a2033f174c 100644 --- a/include/linux/profile.h +++ b/include/linux/profile.h @@ -10,7 +10,6 @@ #define CPU_PROFILING 1 #define SCHED_PROFILING 2 -#define SLEEP_PROFILING 3 #define KVM_PROFILING 4 struct proc_dir_entry; diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 59b3b752394d3cabd34e40f439eae513e4cdc2a5..35f039ecb2725618ca098e3515c6e19e2aece3ee 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -266,12 +266,12 @@ bool __refcount_sub_and_test(int i, refcount_t *r, int *oldp) if (oldp) *oldp = old; - if (old == i) { + if (old > 0 && old == i) { smp_acquire__after_ctrl_dep(); return true; } - if (unlikely(old < 0 || old - i < 0)) + if (unlikely(old <= 0 || old - i < 0)) refcount_warn_saturate(r, REFCOUNT_SUB_UAF); return false; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 96d2140b471ede371061cded9c0c406b7653501d..fd35d4ec12e1d5a7b82e09c1a932418491c41630 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -193,7 +193,6 @@ void ring_buffer_set_clock(struct trace_buffer *buffer, void ring_buffer_set_time_stamp_abs(struct trace_buffer *buffer, bool abs); bool ring_buffer_time_stamp_abs(struct trace_buffer *buffer); -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu); size_t ring_buffer_nr_dirty_pages(struct trace_buffer *buffer, int cpu); struct buffer_data_read_page; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e4f3f3d30a039060238312b77df65889ad0af7e6..d47d5f14ff992f74bd2962dbca9d89d8256109b3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -902,12 +902,29 @@ extern int devm_spi_register_controller(struct device *dev, struct spi_controller *ctlr); extern void spi_unregister_controller(struct spi_controller *ctlr); -#if IS_ENABLED(CONFIG_ACPI) +#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SPI_MASTER) extern struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev); extern struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, struct acpi_device *adev, int index); int acpi_spi_count_resources(struct acpi_device *adev); +#else +static inline struct spi_controller *acpi_spi_find_controller_by_adev(struct acpi_device *adev) +{ + return NULL; +} + +static inline struct spi_device *acpi_spi_device_alloc(struct spi_controller *ctlr, + struct acpi_device *adev, + int index) +{ + return ERR_PTR(-ENODEV); +} + +static inline int acpi_spi_count_resources(struct acpi_device *adev) +{ + return 0; +} #endif /* diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 25fbf960b474bd4d1aad8c14a544dc74c8df3b0a..b86ddca46b9e81de578aa5dbc9eecccb993117e1 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -55,6 +55,7 @@ enum thermal_notify_event { THERMAL_TZ_BIND_CDEV, /* Cooling dev is bind to the thermal zone */ THERMAL_TZ_UNBIND_CDEV, /* Cooling dev is unbind from the thermal zone */ THERMAL_INSTANCE_WEIGHT_CHANGED, /* Thermal instance weight changed */ + THERMAL_TZ_RESUME, /* Thermal zone is resuming after system sleep */ }; /** diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 9df3e2973626bcb4f16fd5cccb3908c31e4d7b56..42bedcddd5113e96920a1c5f6baab88645242ef9 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -680,7 +680,7 @@ struct trace_event_file { * caching and such. Which is mostly OK ;-) */ unsigned long flags; - atomic_t ref; /* ref count for opened files */ + refcount_t ref; /* ref count for opened files */ atomic_t sm_ref; /* soft-mode reference counter */ atomic_t tm_ref; /* trigger-mode reference counter */ }; @@ -880,7 +880,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ecc5cb7b8c91ed6f7b663d1407d6b2927e40953f..4b16844c6bc29a3c29f4236b872b3f4f5291efe5 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -10,6 +10,7 @@ #include #include #include +#include /** * struct virtqueue - a queue to register buffers for sending or receiving. @@ -109,6 +110,8 @@ struct virtio_admin_cmd { __le64 group_member_id; struct scatterlist *data_sg; struct scatterlist *result_sg; + struct completion completion; + int ret; }; /** diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index ab4b9a3fef6ba12fecf83cc89f120f4662f2ab9c..169c7d367facb36dcabf9596068580ea8b8516c7 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -104,8 +104,6 @@ struct virtqueue_info { * Returns 0 on success or error status * If disable_vq_and_reset is set, then enable_vq_after_reset must also be * set. - * @create_avq: create admin virtqueue resource. - * @destroy_avq: destroy admin virtqueue resource. */ struct virtio_config_ops { void (*get)(struct virtio_device *vdev, unsigned offset, @@ -133,8 +131,6 @@ struct virtio_config_ops { struct virtio_shm_region *region, u8 id); int (*disable_vq_and_reset)(struct virtqueue *vq); int (*enable_vq_after_reset)(struct virtqueue *vq); - int (*create_avq)(struct virtio_device *vdev); - void (*destroy_avq)(struct virtio_device *vdev); }; /* If driver didn't advertise the feature, it will never appear. */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index d1d7825318c32d51dae4264bf97b498d5f618b03..6c395a2600e8d18ec6ef73287c24b53d2f98515e 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -56,7 +56,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, unsigned int thlen = 0; unsigned int p_off = 0; unsigned int ip_proto; - u64 ret, remainder, gso_size; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -99,16 +98,6 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, u32 off = __virtio16_to_cpu(little_endian, hdr->csum_offset); u32 needed = start + max_t(u32, thlen, off + sizeof(__sum16)); - if (hdr->gso_size) { - gso_size = __virtio16_to_cpu(little_endian, hdr->gso_size); - ret = div64_u64_rem(skb->len, gso_size, &remainder); - if (!(ret && (hdr->gso_size > needed) && - ((remainder > needed) || (remainder == 0)))) { - return -EINVAL; - } - skb_shinfo(skb)->tx_flags |= SKBFL_SHARED_FRAG; - } - if (!pskb_may_pull(skb, needed)) return -EINVAL; @@ -182,6 +171,11 @@ retry: if (gso_type != SKB_GSO_UDP_L4) return -EINVAL; break; + case SKB_GSO_TCPV4: + case SKB_GSO_TCPV6: + if (skb->csum_offset != offsetof(struct tcphdr, check)) + return -EINVAL; + break; } /* Kernel has a special handling for GSO_BY_FRAGS. */ diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 23cd179420363cb8ec510e4691ee32b3d8d93fac..9eb77c9007e6231bdc8753f944455ccf26d39c87 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -34,10 +34,13 @@ struct reclaim_stat { unsigned nr_lazyfree_fail; }; -enum writeback_stat_item { +/* Stat data for system wide items */ +enum vm_stat_item { NR_DIRTY_THRESHOLD, NR_DIRTY_BG_THRESHOLD, - NR_VM_WRITEBACK_STAT_ITEMS, + NR_MEMMAP_PAGES, /* page metadata allocated through buddy allocator */ + NR_MEMMAP_BOOT_PAGES, /* page metadata allocated through boot allocator */ + NR_VM_STAT_ITEMS, }; #ifdef CONFIG_VM_EVENT_COUNTERS @@ -514,21 +517,13 @@ static inline const char *lru_list_name(enum lru_list lru) return node_stat_name(NR_LRU_BASE + lru) + 3; // skip "nr_" } -static inline const char *writeback_stat_name(enum writeback_stat_item item) -{ - return vmstat_text[NR_VM_ZONE_STAT_ITEMS + - NR_VM_NUMA_EVENT_ITEMS + - NR_VM_NODE_STAT_ITEMS + - item]; -} - #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) static inline const char *vm_event_name(enum vm_event_item item) { return vmstat_text[NR_VM_ZONE_STAT_ITEMS + NR_VM_NUMA_EVENT_ITEMS + NR_VM_NODE_STAT_ITEMS + - NR_VM_WRITEBACK_STAT_ITEMS + + NR_VM_STAT_ITEMS + item]; } #endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */ @@ -625,7 +620,6 @@ static inline void lruvec_stat_sub_folio(struct folio *folio, lruvec_stat_mod_folio(folio, idx, -folio_nr_pages(folio)); } -void __meminit mod_node_early_perpage_metadata(int nid, long delta); -void __meminit store_early_perpage_metadata(void); - +void memmap_boot_pages_add(long delta); +void memmap_pages_add(long delta); #endif /* _LINUX_VMSTAT_H */ diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h index 535701efc1e5cec874b8b94088020b1199c8b8c0..24d970f7a4fa22d289e07c1bbaa0dd8bc7b972bb 100644 --- a/include/net/af_vsock.h +++ b/include/net/af_vsock.h @@ -230,8 +230,12 @@ struct vsock_tap { int vsock_add_tap(struct vsock_tap *vt); int vsock_remove_tap(struct vsock_tap *vt); void vsock_deliver_tap(struct sk_buff *build_skb(void *opaque), void *opaque); +int __vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags); int vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags); int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index e372a88e8c3f6a9a640f791ad4ddcf9be25a7947..d1d073089f384e20eeb269ab2d5192f6c18d6939 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -206,14 +206,17 @@ enum { */ HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, - /* When this quirk is set, the controller has validated that - * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are - * valid. This mechanism is necessary as many controllers have - * been seen has having trouble initiating a connectable - * advertisement despite the state combination being reported as - * supported. + /* When this quirk is set, the LE states reported through the + * HCI_LE_READ_SUPPORTED_STATES are invalid/broken. + * + * This mechanism is necessary as many controllers have been seen has + * having trouble initiating a connectable advertisement despite the + * state combination being reported as supported. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. */ - HCI_QUIRK_VALID_LE_STATES, + HCI_QUIRK_BROKEN_LE_STATES, /* When this quirk is set, then erroneous data reporting * is ignored. This is mainly due to the fact that the HCI diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 31020891fc68cde232075f41ba8f3df26e1067ac..e449dba698f35e93bacb94add158a2bdc2aa827b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -825,7 +825,7 @@ extern struct mutex hci_cb_list_lock; } while (0) #define hci_dev_le_state_simultaneous(hdev) \ - (test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) && \ + (!test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) && \ (hdev->le_states[4] & 0x08) && /* Central */ \ (hdev->le_states[4] & 0x40) && /* Peripheral */ \ (hdev->le_states[3] & 0x10)) /* Simultaneous */ diff --git a/include/net/dsa.h b/include/net/dsa.h index b06f97ae3da1b5a3d66531c5ce5293dc58eff4e1..d7a6c2930277ea106d9c33b7cf29f08436b5333b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -403,14 +403,18 @@ struct dsa_switch { */ u32 configure_vlan_while_not_filtering:1; - /* If the switch driver always programs the CPU port as egress tagged - * despite the VLAN configuration indicating otherwise, then setting - * @untag_bridge_pvid will force the DSA receive path to pop the - * bridge's default_pvid VLAN tagged frames to offer a consistent - * behavior between a vlan_filtering=0 and vlan_filtering=1 bridge - * device. + /* Pop the default_pvid of VLAN-unaware bridge ports from tagged frames. + * DEPRECATED: Do NOT set this field in new drivers. Instead look at + * the dsa_software_vlan_untag() comments. */ u32 untag_bridge_pvid:1; + /* Pop the default_pvid of VLAN-aware bridge ports from tagged frames. + * Useful if the switch cannot preserve the VLAN tag as seen on the + * wire for user port ingress, and chooses to send all frames as + * VLAN-tagged to the CPU, including those which were originally + * untagged. + */ + u32 untag_vlan_aware_bridge_pvid:1; /* Let DSA manage the FDB entries towards the * CPU, based on the software bridge database. diff --git a/include/net/kcm.h b/include/net/kcm.h index 90279e5e09a5c0251aff6d1a72c5947bc55060f6..441e993be634ce58a450a923c7b5259736c1b21b 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -70,6 +70,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + struct mutex tx_mutex; u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index 6439fd8b437b66ac2f704fe11cbb25c86fd525b6..7caa334f48880e500e0f889de9561ae724e5ddc7 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -275,6 +275,7 @@ struct mana_cq { /* NAPI data */ struct napi_struct napi; int work_done; + int work_done_since_doorbell; int budget; }; diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h index 45c40d200154dd075c785a055d37f9ec8f131456..8ecfb94049db5186f9d6dfc86c627ae66cea6e05 100644 --- a/include/scsi/scsi_cmnd.h +++ b/include/scsi/scsi_cmnd.h @@ -234,7 +234,7 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd) static inline unsigned int scsi_logical_block_count(struct scsi_cmnd *scmd) { - unsigned int shift = ilog2(scmd->device->sector_size) - SECTOR_SHIFT; + unsigned int shift = ilog2(scmd->device->sector_size); return blk_rq_bytes(scsi_cmd_to_rq(scmd)) >> shift; } diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 6a37b29f4b4c7af3c8a0fe9fe4ed332b8a3c3cbb..462c653e1017463620475d0e14b64cd5f6b4f794 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -813,6 +813,9 @@ struct ocelot { const u32 *const *map; struct list_head stats_regions; + spinlock_t inj_lock; + spinlock_t xtr_lock; + u32 pool_size[OCELOT_SB_NUM][OCELOT_SB_POOL_NUM]; int packet_buffer_size; int num_frame_refs; @@ -966,10 +969,17 @@ void __ocelot_target_write_ix(struct ocelot *ocelot, enum ocelot_target target, u32 val, u32 reg, u32 offset); /* Packet I/O */ +void ocelot_lock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_inj_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp(struct ocelot *ocelot, int grp); +void ocelot_lock_xtr_grp_bh(struct ocelot *ocelot, int grp); +void ocelot_unlock_xtr_grp_bh(struct ocelot *ocelot, int grp); bool ocelot_can_inject(struct ocelot *ocelot, int grp); void ocelot_port_inject_frame(struct ocelot *ocelot, int port, int grp, u32 rew_op, struct sk_buff *skb); -void ocelot_ifh_port_set(void *ifh, int port, u32 rew_op, u32 vlan_tag); +void ocelot_ifh_set_basic(void *ifh, struct ocelot *ocelot, int port, + u32 rew_op, struct sk_buff *skb); int ocelot_xtr_poll_frame(struct ocelot *ocelot, int grp, struct sk_buff **skb); void ocelot_drain_cpu_queue(struct ocelot *ocelot, int grp); void ocelot_ptp_rx_timestamp(struct ocelot *ocelot, struct sk_buff *skb, diff --git a/include/soc/mscc/ocelot_vcap.h b/include/soc/mscc/ocelot_vcap.h index c601a4598b0da86f51e8cf4a6fbcea67a999fe11..eb19668a06db17abdcf1d25c252b7d50bf723b20 100644 --- a/include/soc/mscc/ocelot_vcap.h +++ b/include/soc/mscc/ocelot_vcap.h @@ -13,6 +13,7 @@ */ #define OCELOT_VCAP_ES0_TAG_8021Q_RXVLAN(ocelot, port, upstream) ((upstream) << 16 | (port)) #define OCELOT_VCAP_IS1_TAG_8021Q_TXVLAN(ocelot, port) (port) +#define OCELOT_VCAP_IS1_VLAN_RECLASSIFY(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_TAG_8021Q_TXVLAN(ocelot, port) (port) #define OCELOT_VCAP_IS2_MRP_REDIRECT(ocelot, port) ((ocelot)->num_phys_ports + (port)) #define OCELOT_VCAP_IS2_MRP_TRAP(ocelot) ((ocelot)->num_phys_ports * 2) @@ -499,6 +500,7 @@ struct ocelot_vcap_key_vlan { struct ocelot_vcap_u8 pcp; /* PCP (3 bit) */ enum ocelot_vcap_bit dei; /* DEI */ enum ocelot_vcap_bit tagged; /* Tagged/untagged frame */ + enum ocelot_vcap_bit tpid; }; struct ocelot_vcap_key_etype { diff --git a/include/sound/cs35l56.h b/include/sound/cs35l56.h index a6aa112e57416ce04b4e9ada51b39b52a2e8de68..a51acefa785f72980f46c2deda5b76dbb4751fab 100644 --- a/include/sound/cs35l56.h +++ b/include/sound/cs35l56.h @@ -277,6 +277,11 @@ static inline int cs35l56_force_sync_asp1_registers_from_cache(struct cs35l56_ba return 0; } +static inline bool cs35l56_is_otp_register(unsigned int reg) +{ + return (reg >> 16) == 3; +} + extern struct regmap_config cs35l56_regmap_i2c; extern struct regmap_config cs35l56_regmap_spi; extern struct regmap_config cs35l56_regmap_sdw; diff --git a/include/sound/soc-component.h b/include/sound/soc-component.h index ceca69b46a821416ad095cb5e36a1e16a77eb048..bf2e381cd124b657af5111257439dbc0db1d9ed8 100644 --- a/include/sound/soc-component.h +++ b/include/sound/soc-component.h @@ -462,6 +462,11 @@ int snd_soc_component_force_enable_pin_unlocked( const char *pin); /* component controls */ +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl); +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl); int snd_soc_component_notify_control(struct snd_soc_component *component, const char * const ctl); diff --git a/include/sound/ump_convert.h b/include/sound/ump_convert.h index 28c364c63245d035004d5b6210a4181dbfc135f6..d099ae27f8491afbbcc65dea5f22149d448b7a92 100644 --- a/include/sound/ump_convert.h +++ b/include/sound/ump_convert.h @@ -13,6 +13,7 @@ struct ump_cvt_to_ump_bank { unsigned char cc_nrpn_msb, cc_nrpn_lsb; unsigned char cc_data_msb, cc_data_lsb; unsigned char cc_bank_msb, cc_bank_lsb; + bool cc_data_msb_set, cc_data_lsb_set; }; /* context for converting from MIDI1 byte stream to UMP packet */ diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 246c0fbd582ee308bf35caaff97b796631ff86a3..0a523023bdcc38e3c20a0d28a58a836381cb34b0 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2383,6 +2383,14 @@ DEFINE_EVENT(btrfs__space_info_update, update_bytes_pinned, TP_ARGS(fs_info, sinfo, old, diff) ); +DEFINE_EVENT(btrfs__space_info_update, update_bytes_zone_unusable, + + TP_PROTO(const struct btrfs_fs_info *fs_info, + const struct btrfs_space_info *sinfo, u64 old, s64 diff), + + TP_ARGS(fs_info, sinfo, old, diff) +); + DECLARE_EVENT_CLASS(btrfs_raid56_bio, TP_PROTO(const struct btrfs_raid_bio *rbio, diff --git a/include/trace/events/mptcp.h b/include/trace/events/mptcp.h index 09e72215b9f9bb53ec363d7690e9b87a09d172cb..085b749cdd97e6598dd1c66631f45eda3b73ee14 100644 --- a/include/trace/events/mptcp.h +++ b/include/trace/events/mptcp.h @@ -34,7 +34,7 @@ TRACE_EVENT(mptcp_subflow_get_send, struct sock *ssk; __entry->active = mptcp_subflow_active(subflow); - __entry->backup = subflow->backup; + __entry->backup = subflow->backup || subflow->request_bkup; if (subflow->tcp_sock && sk_fullsock(subflow->tcp_sock)) __entry->free = sk_stream_memory_free(subflow->tcp_sock); diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h index da23484268dfcfd6deeee9b9c40447b034d775fc..606b4a0f92dae2763ec9f389aa3ae96ced962f81 100644 --- a/include/trace/events/netfs.h +++ b/include/trace/events/netfs.h @@ -51,6 +51,7 @@ EM(netfs_rreq_trace_resubmit, "RESUBMT") \ EM(netfs_rreq_trace_set_pause, "PAUSE ") \ EM(netfs_rreq_trace_unlock, "UNLOCK ") \ + EM(netfs_rreq_trace_unlock_pgpriv2, "UNLCK-2") \ EM(netfs_rreq_trace_unmark, "UNMARK ") \ EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \ @@ -145,6 +146,7 @@ EM(netfs_folio_trace_clear_g, "clear-g") \ EM(netfs_folio_trace_clear_s, "clear-s") \ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \ + EM(netfs_folio_trace_end_copy, "end-copy") \ EM(netfs_folio_trace_filled_gaps, "filled-gaps") \ EM(netfs_folio_trace_kill, "kill") \ EM(netfs_folio_trace_kill_cc, "kill-cc") \ diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ba2d6a0e41ccf78fb6fe62ad39dd8452edec90f0..a96a985c49b3d76338d6a9ec55ff150fb2bf93c1 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -2277,6 +2277,42 @@ DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_wait_on); DEFINE_CLIENT_DEVICE_EVENT(rpcrdma_client_remove_one_done); +DECLARE_EVENT_CLASS(rpcrdma_client_register_class, + TP_PROTO( + const struct ib_device *device, + const struct rpcrdma_notification *rn + ), + + TP_ARGS(device, rn), + + TP_STRUCT__entry( + __string(name, device->name) + __field(void *, callback) + __field(u32, index) + ), + + TP_fast_assign( + __assign_str(name); + __entry->callback = rn->rn_done; + __entry->index = rn->rn_index; + ), + + TP_printk("device=%s index=%u done callback=%pS\n", + __get_str(name), __entry->index, __entry->callback + ) +); + +#define DEFINE_CLIENT_REGISTER_EVENT(name) \ + DEFINE_EVENT(rpcrdma_client_register_class, name, \ + TP_PROTO( \ + const struct ib_device *device, \ + const struct rpcrdma_notification *rn \ + ), \ + TP_ARGS(device, rn)) + +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_register); +DEFINE_CLIENT_REGISTER_EVENT(rpcrdma_client_unregister); + #endif /* _TRACE_RPCRDMA_H */ #include diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 985a262d0f9eee32cfb0b55b8a331c379b015403..5bf6148cac2b93e36f362236545e2e6eb473d5ff 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -841,11 +841,8 @@ __SYSCALL(__NR_lsm_list_modules, sys_lsm_list_modules) #define __NR_mseal 462 __SYSCALL(__NR_mseal, sys_mseal) -#define __NR_uretprobe 463 -__SYSCALL(__NR_uretprobe, sys_uretprobe) - #undef __NR_syscalls -#define __NR_syscalls 464 +#define __NR_syscalls 463 /* * 32 bit systems traditionally used different diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a863f70f1be596edf4e57089edf117..db232a25189eba9feb71c2e43e800d1e4b4b41b9 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1590,10 +1590,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 2aaf7ee256ac42d9287ea948dedb5b81325cfbcb..adc2524fd8e3dca4be2a5967eee3bd1bcebacd9a 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -421,7 +421,7 @@ enum io_uring_msg_ring_flags { * IO completion data structure (Completion Queue Entry) */ struct io_uring_cqe { - __u64 user_data; /* sqe->data submission passed back */ + __u64 user_data; /* sqe->user_data value passed back */ __s32 res; /* result code for this event */ __u32 flags; diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index b133211331f6a60e57e46f7c6bff650dc7d3c637..5fad3d0fcd70788de7c7cb67d4175c43574f0191 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -3,6 +3,7 @@ #define __LINUX_NSFS_H #include +#include #define NSIO 0xb7 @@ -16,7 +17,7 @@ /* Get owner UID (in the caller's user namespace) for a user namespace */ #define NS_GET_OWNER_UID _IO(NSIO, 0x4) /* Get the id for a mount namespace */ -#define NS_GET_MNTNS_ID _IO(NSIO, 0x5) +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) /* Translate pid from target pid namespace into the caller's pid namespace. */ #define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) /* Return thread-group leader id of pid in the callers pid namespace. */ diff --git a/include/uapi/linux/psp-sev.h b/include/uapi/linux/psp-sev.h index 2289b7c76c59aa946abd4375322fdb45ec456008..832c15d9155bdbb95827c3e8214da066efe0f55e 100644 --- a/include/uapi/linux/psp-sev.h +++ b/include/uapi/linux/psp-sev.h @@ -51,6 +51,7 @@ typedef enum { SEV_RET_INVALID_PLATFORM_STATE, SEV_RET_INVALID_GUEST_STATE, SEV_RET_INAVLID_CONFIG, + SEV_RET_INVALID_CONFIG = SEV_RET_INAVLID_CONFIG, SEV_RET_INVALID_LEN, SEV_RET_ALREADY_OWNED, SEV_RET_INVALID_CERTIFICATE, diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h index 91583690bddc5294e926d73b40d201f039474b28..f33d914d8f4699eddef9f9910ec2b30440c26e83 100644 --- a/include/uapi/misc/fastrpc.h +++ b/include/uapi/misc/fastrpc.h @@ -8,14 +8,11 @@ #define FASTRPC_IOCTL_ALLOC_DMA_BUFF _IOWR('R', 1, struct fastrpc_alloc_dma_buf) #define FASTRPC_IOCTL_FREE_DMA_BUFF _IOWR('R', 2, __u32) #define FASTRPC_IOCTL_INVOKE _IOWR('R', 3, struct fastrpc_invoke) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH _IO('R', 4) #define FASTRPC_IOCTL_INIT_CREATE _IOWR('R', 5, struct fastrpc_init_create) #define FASTRPC_IOCTL_MMAP _IOWR('R', 6, struct fastrpc_req_mmap) #define FASTRPC_IOCTL_MUNMAP _IOWR('R', 7, struct fastrpc_req_munmap) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_ATTACH_SNS _IO('R', 8) -/* This ioctl is only supported with secure device nodes */ #define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static) #define FASTRPC_IOCTL_MEM_MAP _IOWR('R', 10, struct fastrpc_mem_map) #define FASTRPC_IOCTL_MEM_UNMAP _IOWR('R', 11, struct fastrpc_mem_unmap) diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index a43b14276bc33dd5ca04dc94249578a3177a9b6f..0fd2aebac72862e789a54e12803a45077d86cdd7 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -676,6 +676,14 @@ enum ufshcd_quirks { * the standard best practice for managing keys). */ UFSHCD_QUIRK_KEYS_IN_PRDT = 1 << 24, + + /* + * This quirk indicates that the controller reports the value 1 (not + * supported) in the Legacy Single DoorBell Support (LSDBS) bit of the + * Controller Capabilities register although it supports the legacy + * single doorbell mode. + */ + UFSHCD_QUIRK_BROKEN_LSDBS_CAP = 1 << 25, }; enum ufshcd_caps { @@ -1109,6 +1117,7 @@ struct ufs_hba { bool ext_iid_sup; bool scsi_host_added; bool mcq_sup; + bool lsdb_sup; bool mcq_enabled; struct ufshcd_res_info res[RES_MAX]; void __iomem *mcq_base; diff --git a/include/ufs/ufshci.h b/include/ufs/ufshci.h index 38fe97971a65facdd67a62cfa32fbf48f8a7dbd7..9917c7743d808c49a67a5008793a44273862f176 100644 --- a/include/ufs/ufshci.h +++ b/include/ufs/ufshci.h @@ -77,6 +77,7 @@ enum { MASK_OUT_OF_ORDER_DATA_DELIVERY_SUPPORT = 0x02000000, MASK_UIC_DME_TEST_MODE_SUPPORT = 0x04000000, MASK_CRYPTO_SUPPORT = 0x10000000, + MASK_LSDB_SUPPORT = 0x20000000, MASK_MCQ_SUPPORT = 0x40000000, }; diff --git a/init/Kconfig b/init/Kconfig index a465ea9525bd590c2b60dd4a4a08c90015bf17f5..5783a0b8751726ad97189d786dca9ee43cb8cd3c 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1902,6 +1902,7 @@ config RUST depends on !MODVERSIONS depends on !GCC_PLUGINS depends on !RANDSTRUCT + depends on !SHADOW_CALL_STACK depends on !DEBUG_INFO_BTF || PAHOLE_HAS_LANG_EXCLUDE help Enables Rust support in the kernel. @@ -1919,7 +1920,7 @@ config RUST config RUSTC_VERSION_TEXT string depends on RUST - default $(shell,command -v $(RUSTC) >/dev/null 2>&1 && $(RUSTC) --version || echo n) + default "$(shell,$(RUSTC) --version 2>/dev/null)" config BINDGEN_VERSION_TEXT string @@ -1927,7 +1928,7 @@ config BINDGEN_VERSION_TEXT # The dummy parameter `workaround-for-0.69.0` is required to support 0.69.0 # (https://github.com/rust-lang/rust-bindgen/pull/2678). It can be removed when # the minimum version is upgraded past that (0.69.1 already fixed the issue). - default $(shell,command -v $(BINDGEN) >/dev/null 2>&1 && $(BINDGEN) --version workaround-for-0.69.0 || echo n) + default "$(shell,$(BINDGEN) --version workaround-for-0.69.0 2>/dev/null)" # # Place an empty function call at each tracepoint site. Can be diff --git a/io_uring/kbuf.c b/io_uring/kbuf.c index c95dc1736dd93019040230aca6126d7b8475dab8..1af2bd56af44ac496fd56f327e596730844ceb3f 100644 --- a/io_uring/kbuf.c +++ b/io_uring/kbuf.c @@ -218,10 +218,13 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg, buf = io_ring_head_to_buf(br, head, bl->mask); if (arg->max_len) { - int needed; + u32 len = READ_ONCE(buf->len); + size_t needed; - needed = (arg->max_len + buf->len - 1) / buf->len; - needed = min(needed, PEEK_MAX_IMPORT); + if (unlikely(!len)) + return -ENOBUFS; + needed = (arg->max_len + len - 1) / len; + needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT); if (nr_avail > needed) nr_avail = needed; } diff --git a/io_uring/napi.c b/io_uring/napi.c index 4fd6bb331e1ed9a3a607ef72b108cd56ae099cfc..1de1d4d629254c13af841d96c6019c0545ab53f8 100644 --- a/io_uring/napi.c +++ b/io_uring/napi.c @@ -26,7 +26,6 @@ static struct io_napi_entry *io_napi_hash_find(struct hlist_head *hash_list, hlist_for_each_entry_rcu(e, hash_list, node) { if (e->napi_id != napi_id) continue; - e->timeout = jiffies + NAPI_TIMEOUT; return e; } @@ -205,7 +204,6 @@ void io_napi_init(struct io_ring_ctx *ctx) void io_napi_free(struct io_ring_ctx *ctx) { struct io_napi_entry *e; - LIST_HEAD(napi_list); unsigned int i; spin_lock(&ctx->napi_lock); @@ -303,7 +301,7 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) { iowq->napi_prefer_busy_poll = READ_ONCE(ctx->napi_prefer_busy_poll); - if (!(ctx->flags & IORING_SETUP_SQPOLL) && ctx->napi_enabled) + if (!(ctx->flags & IORING_SETUP_SQPOLL)) io_napi_blocking_busy_loop(ctx, iowq); } @@ -315,7 +313,6 @@ void __io_napi_busy_loop(struct io_ring_ctx *ctx, struct io_wait_queue *iowq) */ int io_napi_sqpoll_busy_poll(struct io_ring_ctx *ctx) { - LIST_HEAD(napi_list); bool is_stale = false; if (!READ_ONCE(ctx->napi_busy_poll_dt)) diff --git a/io_uring/napi.h b/io_uring/napi.h index 88f1c21d5548ffd16f4975f91f9821fd01399f49..27b88c3eb42894eb5171592b03dd450471c7267d 100644 --- a/io_uring/napi.h +++ b/io_uring/napi.h @@ -55,7 +55,7 @@ static inline void io_napi_add(struct io_kiocb *req) struct io_ring_ctx *ctx = req->ctx; struct socket *sock; - if (!READ_ONCE(ctx->napi_busy_poll_dt)) + if (!READ_ONCE(ctx->napi_enabled)) return; sock = sock_from_file(req->file); diff --git a/io_uring/net.c b/io_uring/net.c index 594490a1389ba25e7ecc2b092298e4de70f247da..d08abcca89cc5b7d5c605f56c69e09f8d973f458 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -601,17 +601,18 @@ retry_bundle: .iovs = &kmsg->fast_iov, .max_len = INT_MAX, .nr_iovs = 1, - .mode = KBUF_MODE_EXPAND, }; if (kmsg->free_iov) { arg.nr_iovs = kmsg->free_iov_nr; arg.iovs = kmsg->free_iov; - arg.mode |= KBUF_MODE_FREE; + arg.mode = KBUF_MODE_FREE; } if (!(sr->flags & IORING_RECVSEND_BUNDLE)) arg.nr_iovs = 1; + else + arg.mode |= KBUF_MODE_EXPAND; ret = io_buffers_select(req, &arg, issue_flags); if (unlikely(ret < 0)) @@ -623,6 +624,7 @@ retry_bundle: if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } @@ -1094,6 +1096,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg if (arg.iovs != &kmsg->fast_iov && arg.iovs != kmsg->free_iov) { kmsg->free_iov_nr = ret; kmsg->free_iov = arg.iovs; + req->flags |= REQ_F_NEED_CLEANUP; } } else { void __user *buf; diff --git a/io_uring/poll.c b/io_uring/poll.c index 0a8e02944689faf2aac44ac74fd82118021a2a97..1f63b60e85e7c07a20e86441b5bd0ea8d7dcf016 100644 --- a/io_uring/poll.c +++ b/io_uring/poll.c @@ -347,6 +347,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts) v &= IO_POLL_REF_MASK; } while (atomic_sub_return(v, &req->poll_refs) & IO_POLL_REF_MASK); + io_napi_add(req); return IOU_POLL_NO_ACTION; } diff --git a/io_uring/sqpoll.c b/io_uring/sqpoll.c index b3722e5275e77e632529dedfb4285f3adfa07537..3b50dc9586d14fcb6a6b16e9fe29d029c86b15bc 100644 --- a/io_uring/sqpoll.c +++ b/io_uring/sqpoll.c @@ -44,7 +44,7 @@ void io_sq_thread_unpark(struct io_sq_data *sqd) void io_sq_thread_park(struct io_sq_data *sqd) __acquires(&sqd->lock) { - WARN_ON_ONCE(sqd->thread == current); + WARN_ON_ONCE(data_race(sqd->thread) == current); atomic_inc(&sqd->park_pending); set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state); diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 4cb5441ad75fc5e0bec630d4f527a21b722cb874..d8520095ca030e1b578a5f3d329a4536e1183516 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -16884,8 +16884,9 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, spi = i / BPF_REG_SIZE; if (exact != NOT_EXACT && - old->stack[spi].slot_type[i % BPF_REG_SIZE] != - cur->stack[spi].slot_type[i % BPF_REG_SIZE]) + (i >= cur->allocated_stack || + old->stack[spi].slot_type[i % BPF_REG_SIZE] != + cur->stack[spi].slot_type[i % BPF_REG_SIZE])) return false; if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ) diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 40ec4abaf4408f69725ea518f323b10662844ba3..4bd9e50bcc8eefd72b2b817415b442a86a421df7 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -232,6 +232,13 @@ static cpumask_var_t isolated_cpus; /* List of remote partition root children */ static struct list_head remote_children; +/* + * A flag to force sched domain rebuild at the end of an operation while + * inhibiting it in the intermediate stages when set. Currently it is only + * set in hotplug code. + */ +static bool force_sd_rebuild; + /* * Partition root states: * @@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); } - if (rebuild_domains) + if (rebuild_domains && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(child, tmp); disable_cnt++; } - if (disable_cnt) + if (disable_cnt && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1991,6 +1998,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ + nocpu |= tasks_nocpu_error(parent, cs, newmask); /* * partcmd_update with newmask: @@ -2440,7 +2449,8 @@ get_css: } rcu_read_unlock(); - if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD)) + if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) && + !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -2523,7 +2533,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); - cpumask_clear(trialcs->effective_xcpus); + if (cpumask_empty(trialcs->exclusive_cpus)) + cpumask_clear(trialcs->effective_xcpus); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) @@ -3101,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); - if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) + if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed && + !force_sd_rebuild) rebuild_sched_domains_locked(); if (spread_flag_changed) @@ -4498,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs, update_tasks_nodemask(cs); } -static bool force_rebuild; - void cpuset_force_rebuild(void) { - force_rebuild = true; + force_sd_rebuild = true; } /** @@ -4650,15 +4660,9 @@ static void cpuset_handle_hotplug(void) !cpumask_empty(subpartitions_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); - /* - * In the rare case that hotplug removes all the cpus in - * subpartitions_cpus, we assumed that cpus are updated. - */ - if (!cpus_updated && !cpumask_empty(subpartitions_cpus)) - cpus_updated = true; - /* For v1, synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { + cpuset_force_rebuild(); spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); @@ -4714,8 +4718,8 @@ static void cpuset_handle_hotplug(void) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; + if (force_sd_rebuild) { + force_sd_rebuild = false; rebuild_sched_domains_cpuslocked(); } diff --git a/kernel/cpu.c b/kernel/cpu.c index 1209ddaec026da7f8f27dcde18b526048ddc87a7..b1fd2a3db91a2ee71bc645882d09d09093459dd6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -2689,6 +2689,16 @@ int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) return ret; } +/** + * Check if the core a CPU belongs to is online + */ +#if !defined(topology_is_core_online) +static inline bool topology_is_core_online(unsigned int cpu) +{ + return true; +} +#endif + int cpuhp_smt_enable(void) { int cpu, ret = 0; @@ -2699,7 +2709,7 @@ int cpuhp_smt_enable(void) /* Skip online CPUs and CPUs on offline nodes */ if (cpu_online(cpu) || !node_online(cpu_to_node(cpu))) continue; - if (!cpu_smt_thread_allowed(cpu)) + if (!cpu_smt_thread_allowed(cpu) || !topology_is_core_online(cpu)) continue; ret = _cpu_up(cpu, 0, CPUHP_ONLINE); if (ret) diff --git a/kernel/crash_reserve.c b/kernel/crash_reserve.c index d3b4cd12bdd134c1c5fbc676d3fc7dc2e4c4c3d3..64d44a52c011474960833f0a864b15d58550bfe8 100644 --- a/kernel/crash_reserve.c +++ b/kernel/crash_reserve.c @@ -423,7 +423,8 @@ retry: if (high && search_end == CRASH_ADDR_HIGH_MAX) { search_end = CRASH_ADDR_LOW_MAX; search_base = 0; - goto retry; + if (search_end != CRASH_ADDR_HIGH_MAX) + goto retry; } pr_warn("cannot allocate crashkernel (size:0x%llx)\n", crash_size); diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index a6e3792b15f8a5ce0f870901de50b53a356df829..d570535342cb78c967e3a7c6e0cc2204e109ba0c 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -416,8 +416,11 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry) * dma_active_cacheline entry to track per event. dma_map_sg(), on the * other hand, consumes a single dma_debug_entry, but inserts 'nents' * entries into the tree. + * + * Use __GFP_NOWARN because the printk from an OOM, to netconsole, could end + * up right back in the DMA debugging code, leading to a deadlock. */ -static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC); +static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC | __GFP_NOWARN); static DEFINE_SPINLOCK(radix_lock); #define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) #define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) diff --git a/kernel/events/core.c b/kernel/events/core.c index c6a720f412258079c9fca17f246444bcad9f823a..4acec97e5448b427d104b7d615004e30b948b84e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1286,8 +1286,9 @@ static void put_ctx(struct perf_event_context *ctx) * perf_event_context::mutex * perf_event::child_mutex; * perf_event_context::lock - * perf_event::mmap_mutex * mmap_lock + * perf_event::mmap_mutex + * perf_buffer::aux_mutex * perf_addr_filters_head::lock * * cpu_hotplug_lock @@ -6458,12 +6459,11 @@ static void perf_mmap_close(struct vm_area_struct *vma) event->pmu->event_unmapped(event, vma->vm_mm); /* - * rb->aux_mmap_count will always drop before rb->mmap_count and - * event->mmap_count, so it is ok to use event->mmap_mutex to - * serialize with perf_mmap here. + * The AUX buffer is strictly a sub-buffer, serialize using aux_mutex + * to avoid complications. */ if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff && - atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) { + atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &rb->aux_mutex)) { /* * Stop all AUX events that are writing to this buffer, * so that we can free its AUX pages and corresponding PMU @@ -6480,7 +6480,7 @@ static void perf_mmap_close(struct vm_area_struct *vma) rb_free_aux(rb); WARN_ON_ONCE(refcount_read(&rb->aux_refcount)); - mutex_unlock(&event->mmap_mutex); + mutex_unlock(&rb->aux_mutex); } if (atomic_dec_and_test(&rb->mmap_count)) @@ -6568,6 +6568,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) struct perf_event *event = file->private_data; unsigned long user_locked, user_lock_limit; struct user_struct *user = current_user(); + struct mutex *aux_mutex = NULL; struct perf_buffer *rb = NULL; unsigned long locked, lock_limit; unsigned long vma_size; @@ -6616,6 +6617,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma) if (!rb) goto aux_unlock; + aux_mutex = &rb->aux_mutex; + mutex_lock(aux_mutex); + aux_offset = READ_ONCE(rb->user_page->aux_offset); aux_size = READ_ONCE(rb->user_page->aux_size); @@ -6766,6 +6770,8 @@ unlock: atomic_dec(&rb->mmap_count); } aux_unlock: + if (aux_mutex) + mutex_unlock(aux_mutex); mutex_unlock(&event->mmap_mutex); /* @@ -9796,7 +9802,8 @@ static int __perf_event_overflow(struct perf_event *event, ret = __perf_event_account_interrupt(event, throttle); - if (event->prog && !bpf_overflow_handler(event, data, regs)) + if (event->prog && event->prog->type == BPF_PROG_TYPE_PERF_EVENT && + !bpf_overflow_handler(event, data, regs)) return ret; /* diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 451514442a1b88e83c60bdc7b1da8606bcc8f11f..e072d995d670f76ff29835101d83a6a3cf412bca 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,6 +40,7 @@ struct perf_buffer { struct user_struct *mmap_user; /* AUX area */ + struct mutex aux_mutex; long aux_head; unsigned int aux_nest; long aux_wakeup; /* last aux_watermark boundary crossed by aux_head */ diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 8cadf97bc2901aaf4f9bb6d6c9149dc90c035360..4f46f688d0d4909d5a307fdc0359b4c0a0d65955 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -337,6 +337,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags) */ if (!rb->nr_pages) rb->paused = 1; + + mutex_init(&rb->aux_mutex); } void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 30348f13d4a72794b820342a3acf5afdf2db3048..cac45ea4c2849ca0a74e1626565bb2289b9699e7 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1470,7 +1470,7 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) struct xol_area *area; void *insns; - area = kmalloc(sizeof(*area), GFP_KERNEL); + area = kzalloc(sizeof(*area), GFP_KERNEL); if (unlikely(!area)) goto out; @@ -1480,7 +1480,6 @@ static struct xol_area *__create_xol_area(unsigned long vaddr) goto free_area; area->xol_mapping.name = "[uprobes]"; - area->xol_mapping.fault = NULL; area->xol_mapping.pages = area->pages; area->pages[0] = alloc_page(GFP_HIGHUSER); if (!area->pages[0]) diff --git a/kernel/fork.c b/kernel/fork.c index cc760491f20127f44ab5db96ec7e858dcce77f54..18bdc87209d0574cfca1831a7979b240a6f57eda 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,11 +2053,24 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - bool thread = flags & PIDFD_THREAD; - - if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + if (!pid) return -EINVAL; + scoped_guard(rcu) { + struct task_struct *tsk; + + if (flags & PIDFD_THREAD) + tsk = pid_task(pid, PIDTYPE_PID); + else + tsk = pid_task(pid, PIDTYPE_TGID); + if (!tsk) + return -EINVAL; + + /* Don't create pidfds for kernel threads for now. */ + if (tsk->flags & PF_KTHREAD) + return -EINVAL; + } + return __pidfd_prepare(pid, flags, ret); } @@ -2403,6 +2416,12 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; + /* Don't create pidfds for kernel threads for now. */ + if (args->kthread) { + retval = -EINVAL; + goto bad_fork_free_pid; + } + /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 07e99c936ba5d86f8f3a291f6b7d95abcd32f03a..1dee88ba0ae449a7c83d5cfa8fe6b69f83b3b6d6 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -530,6 +530,7 @@ static int alloc_descs(unsigned int start, unsigned int cnt, int node, flags = IRQD_AFFINITY_MANAGED | IRQD_MANAGED_SHUTDOWN; } + flags |= IRQD_AFFINITY_SET; mask = &affinity->mask; node = cpu_to_node(cpumask_first(mask)); affinity++; diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 4ad5ed8adf96910395d2e62e8e23c78af815b1f6..6dc76b590703edd263f8e88c9f15da09f4b66aa7 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -236,7 +236,7 @@ void static_key_disable_cpuslocked(struct static_key *key) } jump_label_lock(); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); jump_label_unlock(); } @@ -289,7 +289,7 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key) return; guard(mutex)(&jump_label_mutex); - if (atomic_cmpxchg(&key->enabled, 1, 0)) + if (atomic_cmpxchg(&key->enabled, 1, 0) == 1) jump_label_update(key); else WARN_ON_ONCE(!static_key_slow_try_dec(key)); diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fb2c77368d187eda2a3a9083f3cb4c11f2b78ebb..a9a0ca605d4a8c82c5575706443287120ee6edc4 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -160,38 +160,6 @@ unsigned long kallsyms_sym_address(int idx) return kallsyms_relative_base - 1 - kallsyms_offsets[idx]; } -static void cleanup_symbol_name(char *s) -{ - char *res; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return; - - /* - * LLVM appends various suffixes for local functions and variables that - * must be promoted to global scope as part of LTO. This can break - * hooking of static functions with kprobes. '.' is not a valid - * character in an identifier in C. Suffixes only in LLVM LTO observed: - * - foo.llvm.[0-9a-f]+ - */ - res = strstr(s, ".llvm."); - if (res) - *res = '\0'; - - return; -} - -static int compare_symbol_name(const char *name, char *namebuf) -{ - /* The kallsyms_seqs_of_names is sorted based on names after - * cleanup_symbol_name() (see scripts/kallsyms.c) if clang lto is enabled. - * To ensure correct bisection in kallsyms_lookup_names(), do - * cleanup_symbol_name(namebuf) before comparing name and namebuf. - */ - cleanup_symbol_name(namebuf); - return strcmp(name, namebuf); -} - static unsigned int get_symbol_seq(int index) { unsigned int i, seq = 0; @@ -219,7 +187,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(mid); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - ret = compare_symbol_name(name, namebuf); + ret = strcmp(name, namebuf); if (ret > 0) low = mid + 1; else if (ret < 0) @@ -236,7 +204,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(low - 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; low--; } @@ -248,7 +216,7 @@ static int kallsyms_lookup_names(const char *name, seq = get_symbol_seq(high + 1); off = get_symbol_offset(seq); kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf)); - if (compare_symbol_name(name, namebuf)) + if (strcmp(name, namebuf)) break; high++; } @@ -407,8 +375,7 @@ static int kallsyms_lookup_buildid(unsigned long addr, if (modbuildid) *modbuildid = NULL; - ret = strlen(namebuf); - goto found; + return strlen(namebuf); } /* See if it's in a module or a BPF JITed image. */ @@ -422,8 +389,6 @@ static int kallsyms_lookup_buildid(unsigned long addr, ret = ftrace_mod_address_lookup(addr, symbolsize, offset, modname, namebuf); -found: - cleanup_symbol_name(namebuf); return ret; } @@ -450,8 +415,6 @@ const char *kallsyms_lookup(unsigned long addr, int lookup_symbol_name(unsigned long addr, char *symname) { - int res; - symname[0] = '\0'; symname[KSYM_NAME_LEN - 1] = '\0'; @@ -462,16 +425,10 @@ int lookup_symbol_name(unsigned long addr, char *symname) /* Grab name */ kallsyms_expand_symbol(get_symbol_offset(pos), symname, KSYM_NAME_LEN); - goto found; + return 0; } /* See if it's in a module. */ - res = lookup_module_symbol_name(addr, symname); - if (res) - return res; - -found: - cleanup_symbol_name(symname); - return 0; + return lookup_module_symbol_name(addr, symname); } /* Look up a kernel symbol and return it in a text buffer. */ diff --git a/kernel/kallsyms_selftest.c b/kernel/kallsyms_selftest.c index 2f84896a7bcbdf873be7fa48d05eabe58334bb1a..873f7c445488c17390cff0c21d308aeb8840c27e 100644 --- a/kernel/kallsyms_selftest.c +++ b/kernel/kallsyms_selftest.c @@ -187,31 +187,11 @@ static void test_perf_kallsyms_lookup_name(void) stat.min, stat.max, div_u64(stat.sum, stat.real_cnt)); } -static bool match_cleanup_name(const char *s, const char *name) -{ - char *p; - int len; - - if (!IS_ENABLED(CONFIG_LTO_CLANG)) - return false; - - p = strstr(s, ".llvm."); - if (!p) - return false; - - len = strlen(name); - if (p - s != len) - return false; - - return !strncmp(s, name, len); -} - static int find_symbol(void *data, const char *name, unsigned long addr) { struct test_stat *stat = (struct test_stat *)data; - if (strcmp(name, stat->name) == 0 || - (!stat->perf && match_cleanup_name(name, stat->name))) { + if (!strcmp(name, stat->name)) { stat->real_cnt++; stat->addr = addr; diff --git a/kernel/kcov.c b/kernel/kcov.c index f0a69d402066e12cdbf99f589ac6bbca02896978..274b6b7c718de6ba3eb90d3593ac494b30b25688 100644 --- a/kernel/kcov.c +++ b/kernel/kcov.c @@ -161,6 +161,15 @@ static void kcov_remote_area_put(struct kcov_remote_area *area, kmsan_unpoison_memory(&area->list, sizeof(area->list)); } +/* + * Unlike in_serving_softirq(), this function returns false when called during + * a hardirq or an NMI that happened in the softirq context. + */ +static inline bool in_softirq_really(void) +{ + return in_serving_softirq() && !in_hardirq() && !in_nmi(); +} + static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_struct *t) { unsigned int mode; @@ -170,7 +179,7 @@ static notrace bool check_kcov_mode(enum kcov_mode needed_mode, struct task_stru * so we ignore code executed in interrupts, unless we are in a remote * coverage collection section in a softirq. */ - if (!in_task() && !(in_serving_softirq() && t->kcov_softirq)) + if (!in_task() && !(in_softirq_really() && t->kcov_softirq)) return false; mode = READ_ONCE(t->kcov_mode); /* @@ -849,7 +858,7 @@ void kcov_remote_start(u64 handle) if (WARN_ON(!kcov_check_handle(handle, true, true, true))) return; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); @@ -991,7 +1000,7 @@ void kcov_remote_stop(void) int sequence; unsigned long flags; - if (!in_task() && !in_serving_softirq()) + if (!in_task() && !in_softirq_really()) return; local_lock_irqsave(&kcov_percpu_data.lock, flags); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index e85de37d9e1e01b2dde77c8e63cc4e18673411f6..da59c68df8412c4662d39d3e286d516e8cee9a69 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1557,8 +1557,8 @@ static bool is_cfi_preamble_symbol(unsigned long addr) if (lookup_symbol_name(addr, symbuf)) return false; - return str_has_prefix("__cfi_", symbuf) || - str_has_prefix("__pfx_", symbuf); + return str_has_prefix(symbuf, "__cfi_") || + str_has_prefix(symbuf, "__pfx_"); } static int check_kprobe_address_safe(struct kprobe *p, diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c index 07fb5987b42bdfeb52a66a87954de0c7f0bc0c40..1bab21b4718fffd9fa1c76d46827f44af870dd9d 100644 --- a/kernel/ksysfs.c +++ b/kernel/ksysfs.c @@ -92,7 +92,14 @@ static ssize_t profiling_store(struct kobject *kobj, const char *buf, size_t count) { int ret; + static DEFINE_MUTEX(lock); + /* + * We need serialization, for profile_setup() initializes prof_on + * value and profile_init() must not reallocate prof_buffer after + * once allocated. + */ + guard(mutex)(&lock); if (prof_on) return -EEXIST; /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 58c88220a478ac59a00dc83be03ed260346559e6..0349f957e672da9ede0c3e6ff24892dffa8118e2 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5936,6 +5936,9 @@ __lock_contended(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, ip); @@ -5978,6 +5981,9 @@ __lock_acquired(struct lockdep_map *lock, unsigned long ip) if (DEBUG_LOCKS_WARN_ON(!depth)) return; + if (unlikely(lock->key == &__lockdep_no_track__)) + return; + hlock = find_held_lock(curr, lock, depth, &i); if (!hlock) { print_lock_contention_bug(curr, lock, _RET_IP_); diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index f5a36e67b59355a21a3f4befaf2248236286ff92..ac2e2250274105b118911958f9bcc8e703e33589 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - enum vcpu_state old = vcpu_halted; + u8 old = vcpu_halted; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will diff --git a/kernel/module/main.c b/kernel/module/main.c index d9592195c5bbb4261ab6993a4ad56e7513f6331d..71396e297499f47f9871dbac48ccc5a79f8db65a 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -3104,7 +3104,7 @@ static bool idempotent(struct idempotent *u, const void *cookie) struct idempotent *existing; bool first; - u->ret = 0; + u->ret = -EINTR; u->cookie = cookie; init_completion(&u->complete); @@ -3140,7 +3140,7 @@ static int idempotent_complete(struct idempotent *u, int ret) hlist_for_each_entry_safe(pos, next, head, entry) { if (pos->cookie != cookie) continue; - hlist_del(&pos->entry); + hlist_del_init(&pos->entry); pos->ret = ret; complete(&pos->complete); } @@ -3148,6 +3148,28 @@ static int idempotent_complete(struct idempotent *u, int ret) return ret; } +/* + * Wait for the idempotent worker. + * + * If we get interrupted, we need to remove ourselves from the + * the idempotent list, and the completion may still come in. + * + * The 'idem_lock' protects against the race, and 'idem.ret' was + * initialized to -EINTR and is thus always the right return + * value even if the idempotent work then completes between + * the wait_for_completion and the cleanup. + */ +static int idempotent_wait_for_completion(struct idempotent *u) +{ + if (wait_for_completion_interruptible(&u->complete)) { + spin_lock(&idem_lock); + if (!hlist_unhashed(&u->entry)) + hlist_del(&u->entry); + spin_unlock(&idem_lock); + } + return u->ret; +} + static int init_module_from_file(struct file *f, const char __user * uargs, int flags) { struct load_info info = { }; @@ -3183,15 +3205,16 @@ static int idempotent_init_module(struct file *f, const char __user * uargs, int if (!f || !(f->f_mode & FMODE_READ)) return -EBADF; - /* See if somebody else is doing the operation? */ - if (idempotent(&idem, file_inode(f))) { - wait_for_completion(&idem.complete); - return idem.ret; + /* Are we the winners of the race and get to do this? */ + if (!idempotent(&idem, file_inode(f))) { + int ret = init_module_from_file(f, uargs, flags); + return idempotent_complete(&idem, ret); } - /* Otherwise, we'll do it and complete others */ - return idempotent_complete(&idem, - init_module_from_file(f, uargs, flags)); + /* + * Somebody else won the race and is loading the module. + */ + return idempotent_wait_for_completion(&idem); } SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) diff --git a/kernel/padata.c b/kernel/padata.c index 53f4bc9127127c287bad71da7d870cbb42c51d6a..0fa6c289546032727595a1ccbdc2e48d3ceb342d 100644 --- a/kernel/padata.c +++ b/kernel/padata.c @@ -517,6 +517,13 @@ void __init padata_do_multithreaded(struct padata_mt_job *job) ps.chunk_size = max(ps.chunk_size, job->min_chunk); ps.chunk_size = roundup(ps.chunk_size, job->align); + /* + * chunk_size can be 0 if the caller sets min_chunk to 0. So force it + * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().` + */ + if (!ps.chunk_size) + ps.chunk_size = 1U; + list_for_each_entry(pw, &works, pw_list) if (job->numa_aware) { int old_node = atomic_read(&last_used_nid); diff --git a/kernel/panic.c b/kernel/panic.c index f861bedc1925e748e6589b0196388784ceba302d..2a0449144f82ea8c3daef723b56f63a22e128657 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -64,6 +64,8 @@ unsigned long panic_on_taint; bool panic_on_taint_nousertaint = false; static unsigned int warn_limit __read_mostly; +bool panic_triggering_all_cpu_backtrace; + int panic_timeout = CONFIG_PANIC_TIMEOUT; EXPORT_SYMBOL_GPL(panic_timeout); @@ -253,8 +255,12 @@ void check_panic_on_warn(const char *origin) */ static void panic_other_cpus_shutdown(bool crash_kexec) { - if (panic_print & PANIC_PRINT_ALL_CPU_BT) + if (panic_print & PANIC_PRINT_ALL_CPU_BT) { + /* Temporary allow non-panic CPUs to write their backtraces. */ + panic_triggering_all_cpu_backtrace = true; trigger_all_cpu_backtrace(); + panic_triggering_all_cpu_backtrace = false; + } /* * Note that smp_send_stop() is the usual SMP shutdown function, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 054c0e7784fdfdcb1b3f11a67ccd82289eec810a..c22b07049c382c2174416adfd3aab9afab5a85a9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -2316,7 +2316,7 @@ asmlinkage int vprintk_emit(int facility, int level, * non-panic CPUs are generating any messages, they will be * silently dropped. */ - if (other_cpu_in_panic()) + if (other_cpu_in_panic() && !panic_triggering_all_cpu_backtrace) return 0; if (level == LOGLEVEL_SCHED) { diff --git a/kernel/profile.c b/kernel/profile.c index 2b775cc5c28f96cebe33e6d9c55a001d0debd6ce..1fcf1adcf4eb033ef0a5e2960c63404700b94519 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -47,30 +47,14 @@ static unsigned short int prof_shift; int prof_on __read_mostly; EXPORT_SYMBOL_GPL(prof_on); -static cpumask_var_t prof_cpu_mask; -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -static DEFINE_PER_CPU(struct profile_hit *[2], cpu_profile_hits); -static DEFINE_PER_CPU(int, cpu_profile_flip); -static DEFINE_MUTEX(profile_flip_mutex); -#endif /* CONFIG_SMP */ - int profile_setup(char *str) { static const char schedstr[] = "schedule"; - static const char sleepstr[] = "sleep"; static const char kvmstr[] = "kvm"; const char *select = NULL; int par; - if (!strncmp(str, sleepstr, strlen(sleepstr))) { -#ifdef CONFIG_SCHEDSTATS - force_schedstat_enabled(); - prof_on = SLEEP_PROFILING; - select = sleepstr; -#else - pr_warn("kernel sleep profiling requires CONFIG_SCHEDSTATS\n"); -#endif /* CONFIG_SCHEDSTATS */ - } else if (!strncmp(str, schedstr, strlen(schedstr))) { + if (!strncmp(str, schedstr, strlen(schedstr))) { prof_on = SCHED_PROFILING; select = schedstr; } else if (!strncmp(str, kvmstr, strlen(kvmstr))) { @@ -114,11 +98,6 @@ int __ref profile_init(void) buffer_bytes = prof_len*sizeof(atomic_t); - if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL)) - return -ENOMEM; - - cpumask_copy(prof_cpu_mask, cpu_possible_mask); - prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL|__GFP_NOWARN); if (prof_buffer) return 0; @@ -132,195 +111,16 @@ int __ref profile_init(void) if (prof_buffer) return 0; - free_cpumask_var(prof_cpu_mask); return -ENOMEM; } -#if defined(CONFIG_SMP) && defined(CONFIG_PROC_FS) -/* - * Each cpu has a pair of open-addressed hashtables for pending - * profile hits. read_profile() IPI's all cpus to request them - * to flip buffers and flushes their contents to prof_buffer itself. - * Flip requests are serialized by the profile_flip_mutex. The sole - * use of having a second hashtable is for avoiding cacheline - * contention that would otherwise happen during flushes of pending - * profile hits required for the accuracy of reported profile hits - * and so resurrect the interrupt livelock issue. - * - * The open-addressed hashtables are indexed by profile buffer slot - * and hold the number of pending hits to that profile buffer slot on - * a cpu in an entry. When the hashtable overflows, all pending hits - * are accounted to their corresponding profile buffer slots with - * atomic_add() and the hashtable emptied. As numerous pending hits - * may be accounted to a profile buffer slot in a hashtable entry, - * this amortizes a number of atomic profile buffer increments likely - * to be far larger than the number of entries in the hashtable, - * particularly given that the number of distinct profile buffer - * positions to which hits are accounted during short intervals (e.g. - * several seconds) is usually very small. Exclusion from buffer - * flipping is provided by interrupt disablement (note that for - * SCHED_PROFILING or SLEEP_PROFILING profile_hit() may be called from - * process context). - * The hash function is meant to be lightweight as opposed to strong, - * and was vaguely inspired by ppc64 firmware-supported inverted - * pagetable hash functions, but uses a full hashtable full of finite - * collision chains, not just pairs of them. - * - * -- nyc - */ -static void __profile_flip_buffers(void *unused) -{ - int cpu = smp_processor_id(); - - per_cpu(cpu_profile_flip, cpu) = !per_cpu(cpu_profile_flip, cpu); -} - -static void profile_flip_buffers(void) -{ - int i, j, cpu; - - mutex_lock(&profile_flip_mutex); - j = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[j]; - for (i = 0; i < NR_PROFILE_HIT; ++i) { - if (!hits[i].hits) { - if (hits[i].pc) - hits[i].pc = 0; - continue; - } - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].hits = hits[i].pc = 0; - } - } - mutex_unlock(&profile_flip_mutex); -} - -static void profile_discard_flip_buffers(void) -{ - int i, cpu; - - mutex_lock(&profile_flip_mutex); - i = per_cpu(cpu_profile_flip, get_cpu()); - put_cpu(); - on_each_cpu(__profile_flip_buffers, NULL, 1); - for_each_online_cpu(cpu) { - struct profile_hit *hits = per_cpu(cpu_profile_hits, cpu)[i]; - memset(hits, 0, NR_PROFILE_HIT*sizeof(struct profile_hit)); - } - mutex_unlock(&profile_flip_mutex); -} - -static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) -{ - unsigned long primary, secondary, flags, pc = (unsigned long)__pc; - int i, j, cpu; - struct profile_hit *hits; - - pc = min((pc - (unsigned long)_stext) >> prof_shift, prof_len - 1); - i = primary = (pc & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - secondary = (~(pc << 1) & (NR_PROFILE_GRP - 1)) << PROFILE_GRPSHIFT; - cpu = get_cpu(); - hits = per_cpu(cpu_profile_hits, cpu)[per_cpu(cpu_profile_flip, cpu)]; - if (!hits) { - put_cpu(); - return; - } - /* - * We buffer the global profiler buffer into a per-CPU - * queue and thus reduce the number of global (and possibly - * NUMA-alien) accesses. The write-queue is self-coalescing: - */ - local_irq_save(flags); - do { - for (j = 0; j < PROFILE_GRPSZ; ++j) { - if (hits[i + j].pc == pc) { - hits[i + j].hits += nr_hits; - goto out; - } else if (!hits[i + j].hits) { - hits[i + j].pc = pc; - hits[i + j].hits = nr_hits; - goto out; - } - } - i = (i + secondary) & (NR_PROFILE_HIT - 1); - } while (i != primary); - - /* - * Add the current hit(s) and flush the write-queue out - * to the global buffer: - */ - atomic_add(nr_hits, &prof_buffer[pc]); - for (i = 0; i < NR_PROFILE_HIT; ++i) { - atomic_add(hits[i].hits, &prof_buffer[hits[i].pc]); - hits[i].pc = hits[i].hits = 0; - } -out: - local_irq_restore(flags); - put_cpu(); -} - -static int profile_dead_cpu(unsigned int cpu) -{ - struct page *page; - int i; - - if (cpumask_available(prof_cpu_mask)) - cpumask_clear_cpu(cpu, prof_cpu_mask); - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) { - page = virt_to_page(per_cpu(cpu_profile_hits, cpu)[i]); - per_cpu(cpu_profile_hits, cpu)[i] = NULL; - __free_page(page); - } - } - return 0; -} - -static int profile_prepare_cpu(unsigned int cpu) -{ - int i, node = cpu_to_mem(cpu); - struct page *page; - - per_cpu(cpu_profile_flip, cpu) = 0; - - for (i = 0; i < 2; i++) { - if (per_cpu(cpu_profile_hits, cpu)[i]) - continue; - - page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); - if (!page) { - profile_dead_cpu(cpu); - return -ENOMEM; - } - per_cpu(cpu_profile_hits, cpu)[i] = page_address(page); - - } - return 0; -} - -static int profile_online_cpu(unsigned int cpu) -{ - if (cpumask_available(prof_cpu_mask)) - cpumask_set_cpu(cpu, prof_cpu_mask); - - return 0; -} - -#else /* !CONFIG_SMP */ -#define profile_flip_buffers() do { } while (0) -#define profile_discard_flip_buffers() do { } while (0) - static void do_profile_hits(int type, void *__pc, unsigned int nr_hits) { unsigned long pc; pc = ((unsigned long)__pc - (unsigned long)_stext) >> prof_shift; - atomic_add(nr_hits, &prof_buffer[min(pc, prof_len - 1)]); + if (pc < prof_len) + atomic_add(nr_hits, &prof_buffer[pc]); } -#endif /* !CONFIG_SMP */ void profile_hits(int type, void *__pc, unsigned int nr_hits) { @@ -334,8 +134,8 @@ void profile_tick(int type) { struct pt_regs *regs = get_irq_regs(); - if (!user_mode(regs) && cpumask_available(prof_cpu_mask) && - cpumask_test_cpu(smp_processor_id(), prof_cpu_mask)) + /* This is the old kernel-only legacy profiling */ + if (!user_mode(regs)) profile_hit(type, (void *)profile_pc(regs)); } @@ -358,7 +158,6 @@ read_profile(struct file *file, char __user *buf, size_t count, loff_t *ppos) char *pnt; unsigned long sample_step = 1UL << prof_shift; - profile_flip_buffers(); if (p >= (prof_len+1)*sizeof(unsigned int)) return 0; if (count > (prof_len+1)*sizeof(unsigned int) - p) @@ -404,7 +203,6 @@ static ssize_t write_profile(struct file *file, const char __user *buf, return -EINVAL; } #endif - profile_discard_flip_buffers(); memset(prof_buffer, 0, prof_len * sizeof(atomic_t)); return count; } @@ -418,40 +216,14 @@ static const struct proc_ops profile_proc_ops = { int __ref create_proc_profile(void) { struct proc_dir_entry *entry; -#ifdef CONFIG_SMP - enum cpuhp_state online_state; -#endif - int err = 0; if (!prof_on) return 0; -#ifdef CONFIG_SMP - err = cpuhp_setup_state(CPUHP_PROFILE_PREPARE, "PROFILE_PREPARE", - profile_prepare_cpu, profile_dead_cpu); - if (err) - return err; - - err = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "AP_PROFILE_ONLINE", - profile_online_cpu, NULL); - if (err < 0) - goto err_state_prep; - online_state = err; - err = 0; -#endif entry = proc_create("profile", S_IWUSR | S_IRUGO, NULL, &profile_proc_ops); - if (!entry) - goto err_state_onl; - proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); - - return err; -err_state_onl: -#ifdef CONFIG_SMP - cpuhp_remove_state(online_state); -err_state_prep: - cpuhp_remove_state(CPUHP_PROFILE_PREPARE); -#endif + if (entry) + proc_set_size(entry, (1 + prof_len) * sizeof(atomic_t)); return err; } subsys_initcall(create_proc_profile); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a9f655025607b977a790adbc7f33b1265fce6636..f3951e4a55e5b6078142f2f2b44359b7560e6e4f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7845,6 +7845,30 @@ void set_rq_offline(struct rq *rq) } } +static inline void sched_set_rq_online(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_online(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + +static inline void sched_set_rq_offline(struct rq *rq, int cpu) +{ + struct rq_flags rf; + + rq_lock_irqsave(rq, &rf); + if (rq->rd) { + BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); + set_rq_offline(rq); + } + rq_unlock_irqrestore(rq, &rf); +} + /* * used to mark begin/end of suspend/resume: */ @@ -7895,10 +7919,25 @@ static int cpuset_cpu_inactive(unsigned int cpu) return 0; } +static inline void sched_smt_present_inc(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_inc_cpuslocked(&sched_smt_present); +#endif +} + +static inline void sched_smt_present_dec(int cpu) +{ +#ifdef CONFIG_SCHED_SMT + if (cpumask_weight(cpu_smt_mask(cpu)) == 2) + static_branch_dec_cpuslocked(&sched_smt_present); +#endif +} + int sched_cpu_activate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; /* * Clear the balance_push callback and prepare to schedule @@ -7906,13 +7945,10 @@ int sched_cpu_activate(unsigned int cpu) */ balance_push_set(cpu, false); -#ifdef CONFIG_SCHED_SMT /* * When going up, increment the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_inc_cpuslocked(&sched_smt_present); -#endif + sched_smt_present_inc(cpu); set_cpu_active(cpu, true); if (sched_smp_initialized) { @@ -7930,12 +7966,7 @@ int sched_cpu_activate(unsigned int cpu) * 2) At runtime, if cpuset_cpu_active() fails to rebuild the * domains. */ - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_online(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_online(rq, cpu); return 0; } @@ -7943,7 +7974,6 @@ int sched_cpu_activate(unsigned int cpu) int sched_cpu_deactivate(unsigned int cpu) { struct rq *rq = cpu_rq(cpu); - struct rq_flags rf; int ret; /* @@ -7974,20 +8004,14 @@ int sched_cpu_deactivate(unsigned int cpu) */ synchronize_rcu(); - rq_lock_irqsave(rq, &rf); - if (rq->rd) { - BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span)); - set_rq_offline(rq); - } - rq_unlock_irqrestore(rq, &rf); + sched_set_rq_offline(rq, cpu); -#ifdef CONFIG_SCHED_SMT /* * When going down, decrement the number of cores with SMT present. */ - if (cpumask_weight(cpu_smt_mask(cpu)) == 2) - static_branch_dec_cpuslocked(&sched_smt_present); + sched_smt_present_dec(cpu); +#ifdef CONFIG_SCHED_SMT sched_core_cpu_deactivate(cpu); #endif @@ -7997,6 +8021,8 @@ int sched_cpu_deactivate(unsigned int cpu) sched_update_numa(cpu, false); ret = cpuset_cpu_inactive(cpu); if (ret) { + sched_smt_present_inc(cpu); + sched_set_rq_online(rq, cpu); balance_push_set(cpu, false); set_cpu_active(cpu, true); sched_update_numa(cpu, true); diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a5e00293ae433077920ea7cc0b3a73c7e59f5dc5..0bed0fa1acd988a0b29aed21fa9f77e908795029 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -582,6 +582,12 @@ void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, } stime = mul_u64_u64_div_u64(stime, rtime, stime + utime); + /* + * Because mul_u64_u64_div_u64() can approximate on some + * achitectures; enforce the constraint that: a*b/(b+c) <= a. + */ + if (unlikely(stime > rtime)) + stime = rtime; update: /* diff --git a/kernel/sched/stats.c b/kernel/sched/stats.c index 78e48f5426ee1f8157e32381d32b79cfd5990373..eb0cdcd4d9212e3a5ce1696f4257f93c66dacfb2 100644 --- a/kernel/sched/stats.c +++ b/kernel/sched/stats.c @@ -92,16 +92,6 @@ void __update_stats_enqueue_sleeper(struct rq *rq, struct task_struct *p, trace_sched_stat_blocked(p, delta); - /* - * Blocking time is in units of nanosecs, so shift by - * 20 to get a milliseconds-range estimation of the - * amount of time that the task spent sleeping: - */ - if (unlikely(prof_on == SLEEP_PROFILING)) { - profile_hits(SLEEP_PROFILING, - (void *)get_wchan(p), - delta >> 20); - } account_scheduler_latency(p, delta >> 10, 0); } } diff --git a/kernel/task_work.c b/kernel/task_work.c index 5c2daa7ad3f906aefa5a521b36ce49d30257725d..5d14d639ac71b54a24dac49acd02f6c2c7e58e04 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -6,12 +6,14 @@ static struct callback_head work_exited; /* all we need is ->next == NULL */ +#ifdef CONFIG_IRQ_WORK static void task_work_set_notify_irq(struct irq_work *entry) { test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = IRQ_WORK_INIT_HARD(task_work_set_notify_irq); +#endif /** * task_work_add - ask the @task to execute @work->func() @@ -57,6 +59,8 @@ int task_work_add(struct task_struct *task, struct callback_head *work, if (notify == TWA_NMI_CURRENT) { if (WARN_ON_ONCE(task != current)) return -EINVAL; + if (!IS_ENABLED(CONFIG_IRQ_WORK)) + return -EINVAL; } else { /* record the work call stack in order to print it in KASAN reports */ kasan_record_aux_stack(work); @@ -81,9 +85,11 @@ int task_work_add(struct task_struct *task, struct callback_head *work, case TWA_SIGNAL_NO_IPI: __set_notify_signal(task); break; +#ifdef CONFIG_IRQ_WORK case TWA_NMI_CURRENT: irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); break; +#endif default: WARN_ON_ONCE(1); break; diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index d25ba49e313cca182029e272b7bac004773f92a8..d0538a75f4c63ac1e9c8eb36f86eeb8f3b8e87d2 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -246,7 +246,7 @@ static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, wd_delay = cycles_to_nsec_safe(watchdog, *wdnow, wd_end); if (wd_delay <= WATCHDOG_MAX_SKEW) { - if (nretries > 1 || nretries >= max_retries) { + if (nretries > 1 && nretries >= max_retries) { pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n", smp_processor_id(), watchdog->name, nretries); } diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c index 406dccb79c2b6b5dde3c619e3107afd9925ab7ff..8d2dd214ec68227814a880cd5e1f73c6700c0d4e 100644 --- a/kernel/time/ntp.c +++ b/kernel/time/ntp.c @@ -727,17 +727,16 @@ static inline void process_adjtimex_modes(const struct __kernel_timex *txc, } if (txc->modes & ADJ_MAXERROR) - time_maxerror = txc->maxerror; + time_maxerror = clamp(txc->maxerror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_ESTERROR) - time_esterror = txc->esterror; + time_esterror = clamp(txc->esterror, 0, NTP_PHASE_LIMIT); if (txc->modes & ADJ_TIMECONST) { - time_constant = txc->constant; + time_constant = clamp(txc->constant, 0, MAXTC); if (!(time_status & STA_NANO)) time_constant += 4; - time_constant = min(time_constant, (long)MAXTC); - time_constant = max(time_constant, 0l); + time_constant = clamp(time_constant, 0, MAXTC); } if (txc->modes & ADJ_TAI && diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b4843099a8da74ad79903e8ea9d59b260006f277..ed58eebb4e8f423cf0af169b17b09d94061cbb6a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -1141,7 +1141,6 @@ void tick_broadcast_switch_to_oneshot(void) #ifdef CONFIG_HOTPLUG_CPU void hotplug_cpu__broadcast_tick_pull(int deadcpu) { - struct tick_device *td = this_cpu_ptr(&tick_cpu_device); struct clock_event_device *bc; unsigned long flags; @@ -1167,6 +1166,8 @@ void hotplug_cpu__broadcast_tick_pull(int deadcpu) * device to avoid the starvation. */ if (tick_check_broadcast_expired()) { + struct tick_device *td = this_cpu_ptr(&tick_cpu_device); + cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); tick_program_event(td->evtdev->next_event, 1); } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 2fa87dcfeda9c2f16b3613544ce10f363145d181..5391e4167d60226dfc48c845170e36bcbeb7b292 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -2606,7 +2606,7 @@ int do_adjtimex(struct __kernel_timex *txc) clock_set |= timekeeping_advance(TK_ADV_FREQ); if (clock_set) - clock_was_set(CLOCK_REALTIME); + clock_was_set(CLOCK_SET_WALL); ntp_notify_cmos_timer(); diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index fc205ad167a9b72b0f6440f9b0369732dc147fe5..d1d5ea2d0a1b706d1be876ae7172a4ada898b927 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -902,7 +902,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx, i = *idx ? : task->curr_ret_stack; while (i > 0) { - ret_stack = get_ret_stack(current, i, &i); + ret_stack = get_ret_stack(task, i, &i); if (!ret_stack) break; /* diff --git a/kernel/trace/preemptirq_delay_test.c b/kernel/trace/preemptirq_delay_test.c index cb0871fbdb07f034465c51c59ef9ca06b84768de..314ffc143039c53da1c0612448901a8104cdd3d5 100644 --- a/kernel/trace/preemptirq_delay_test.c +++ b/kernel/trace/preemptirq_delay_test.c @@ -34,8 +34,6 @@ MODULE_PARM_DESC(cpu_affinity, "Cpu num test is running on"); static struct completion done; -#define MIN(x, y) ((x) < (y) ? (x) : (y)) - static void busy_wait(ulong time) { u64 start, end; diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 28853966aa9afff9fc6e9a6af90a3eec58902c60..cebd879a30cbd10b75945239d4eff05bd84538d5 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -692,18 +692,6 @@ u64 ring_buffer_event_time_stamp(struct trace_buffer *buffer, return ts; } -/** - * ring_buffer_nr_pages - get the number of buffer pages in the ring buffer - * @buffer: The ring_buffer to get the number of pages from - * @cpu: The cpu of the ring_buffer to get the number of pages from - * - * Returns the number of pages used by a per_cpu buffer of the ring buffer. - */ -size_t ring_buffer_nr_pages(struct trace_buffer *buffer, int cpu) -{ - return buffer->buffers[cpu]->nr_pages; -} - /** * ring_buffer_nr_dirty_pages - get the number of used pages in the ring buffer * @buffer: The ring_buffer to get the number of pages from diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 10cd38bce2f1c47ca894782aa1265f8d715445f8..ebe7ce2f5f4a50f9402da8978e89b7db66592079 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7956,7 +7956,7 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, trace_access_unlock(iter->cpu_file); if (ret < 0) { - if (trace_empty(iter)) { + if (trace_empty(iter) && !iter->closed) { if ((filp->f_flags & O_NONBLOCK)) return -EAGAIN; diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 8783bebd0562e51aa572759746111efcbff9157f..bd3e3069300e267c2a0cb5a5e0e2e56ce1f2de83 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -1634,6 +1634,29 @@ static inline void *event_file_data(struct file *filp) extern struct mutex event_mutex; extern struct list_head ftrace_events; +/* + * When the trace_event_file is the filp->i_private pointer, + * it must be taken under the event_mutex lock, and then checked + * if the EVENT_FILE_FL_FREED flag is set. If it is, then the + * data pointed to by the trace_event_file can not be trusted. + * + * Use the event_file_file() to access the trace_event_file from + * the filp the first time under the event_mutex and check for + * NULL. If it is needed to be retrieved again and the event_mutex + * is still held, then the event_file_data() can be used and it + * is guaranteed to be valid. + */ +static inline struct trace_event_file *event_file_file(struct file *filp) +{ + struct trace_event_file *file; + + lockdep_assert_held(&event_mutex); + file = READ_ONCE(file_inode(filp)->i_private); + if (!file || file->flags & EVENT_FILE_FL_FREED) + return NULL; + return file; +} + extern const struct file_operations event_trigger_fops; extern const struct file_operations event_hist_fops; extern const struct file_operations event_hist_debug_fops; diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index 6ef29eba90ceb7e06774171d79c7ea244ecaa72a..7266ec2a4eea0016de17676edb17b4d08af4fe41 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -992,18 +992,18 @@ static void remove_subsystem(struct trace_subsystem_dir *dir) void event_file_get(struct trace_event_file *file) { - atomic_inc(&file->ref); + refcount_inc(&file->ref); } void event_file_put(struct trace_event_file *file) { - if (WARN_ON_ONCE(!atomic_read(&file->ref))) { + if (WARN_ON_ONCE(!refcount_read(&file->ref))) { if (file->flags & EVENT_FILE_FL_FREED) kmem_cache_free(file_cachep, file); return; } - if (atomic_dec_and_test(&file->ref)) { + if (refcount_dec_and_test(&file->ref)) { /* Count should only go to zero when it is freed */ if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED))) return; @@ -1386,12 +1386,12 @@ event_enable_read(struct file *filp, char __user *ubuf, size_t cnt, char buf[4] = "0"; mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (likely(file)) flags = file->flags; mutex_unlock(&event_mutex); - if (!file || flags & EVENT_FILE_FL_FREED) + if (!file) return -ENODEV; if (flags & EVENT_FILE_FL_ENABLED && @@ -1424,8 +1424,8 @@ event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt, case 1: ret = -ENODEV; mutex_lock(&event_mutex); - file = event_file_data(filp); - if (likely(file && !(file->flags & EVENT_FILE_FL_FREED))) { + file = event_file_file(filp); + if (likely(file)) { ret = tracing_update_buffers(file->tr); if (ret < 0) { mutex_unlock(&event_mutex); @@ -1540,7 +1540,8 @@ enum { static void *f_next(struct seq_file *m, void *v, loff_t *pos) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct list_head *common_head = &ftrace_common_fields; struct list_head *head = trace_get_fields(call); struct list_head *node = v; @@ -1572,7 +1573,8 @@ static void *f_next(struct seq_file *m, void *v, loff_t *pos) static int f_show(struct seq_file *m, void *v) { - struct trace_event_call *call = event_file_data(m->private); + struct trace_event_file *file = event_file_data(m->private); + struct trace_event_call *call = file->event_call; struct ftrace_event_field *field; const char *array_descriptor; @@ -1627,12 +1629,14 @@ static int f_show(struct seq_file *m, void *v) static void *f_start(struct seq_file *m, loff_t *pos) { + struct trace_event_file *file; void *p = (void *)FORMAT_HEADER; loff_t l = 0; /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - if (!event_file_data(m->private)) + file = event_file_file(m->private); + if (!file) return ERR_PTR(-ENODEV); while (l < *pos && p) @@ -1706,8 +1710,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt, trace_seq_init(s); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file && !(file->flags & EVENT_FILE_FL_FREED)) + file = event_file_file(filp); + if (file) print_event_filter(file, s); mutex_unlock(&event_mutex); @@ -1736,9 +1740,13 @@ event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt, return PTR_ERR(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); - if (file) - err = apply_event_filter(file, buf); + file = event_file_file(filp); + if (file) { + if (file->flags & EVENT_FILE_FL_FREED) + err = -ENODEV; + else + err = apply_event_filter(file, buf); + } mutex_unlock(&event_mutex); kfree(buf); @@ -2485,7 +2493,6 @@ static int event_callback(const char *name, umode_t *mode, void **data, if (strcmp(name, "format") == 0) { *mode = TRACE_MODE_READ; *fops = &ftrace_event_format_fops; - *data = call; return 1; } @@ -2996,7 +3003,7 @@ trace_create_new_event(struct trace_event_call *call, atomic_set(&file->tm_ref, 0); INIT_LIST_HEAD(&file->triggers); list_add(&file->list, &tr->events); - event_file_get(file); + refcount_set(&file->ref, 1); return file; } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 6ece1308d36a02dec5af3ca3cebdb9d6b427aac7..5f9119eb7c67f6e1e11fcf2c8a37c311e1a0372d 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -5601,7 +5601,7 @@ static int hist_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; @@ -5880,7 +5880,7 @@ static int hist_debug_show(struct seq_file *m, void *v) mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) { ret = -ENODEV; goto out_unlock; diff --git a/kernel/trace/trace_events_inject.c b/kernel/trace/trace_events_inject.c index 8650562bdaa9886e59a519aec516fc4b9209f0b5..a8f076809db4d58978847c7ad3f161dac220cf09 100644 --- a/kernel/trace/trace_events_inject.c +++ b/kernel/trace/trace_events_inject.c @@ -299,7 +299,7 @@ event_inject_write(struct file *filp, const char __user *ubuf, size_t cnt, strim(buf); mutex_lock(&event_mutex); - file = event_file_data(filp); + file = event_file_file(filp); if (file) { call = file->event_call; size = parse_entry(buf, call, &entry); diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 4bec043c8690d382f8c493721e0f6e7a1ce1a91a..a5e3d6acf1e1e241ffeef791b9281c47840dfaea 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -159,7 +159,7 @@ static void *trigger_start(struct seq_file *m, loff_t *pos) /* ->stop() is called even if ->start() fails */ mutex_lock(&event_mutex); - event_file = event_file_data(m->private); + event_file = event_file_file(m->private); if (unlikely(!event_file)) return ERR_PTR(-ENODEV); @@ -213,7 +213,7 @@ static int event_trigger_regex_open(struct inode *inode, struct file *file) mutex_lock(&event_mutex); - if (unlikely(!event_file_data(file))) { + if (unlikely(!event_file_file(file))) { mutex_unlock(&event_mutex); return -ENODEV; } @@ -293,7 +293,7 @@ static ssize_t event_trigger_regex_write(struct file *file, strim(buf); mutex_lock(&event_mutex); - event_file = event_file_data(file); + event_file = event_file_file(file); if (unlikely(!event_file)) { mutex_unlock(&event_mutex); kfree(buf); diff --git a/kernel/trace/tracing_map.c b/kernel/trace/tracing_map.c index a4dcf0f2435213bc2b2b91d677ec18290aa53859..3a56e7c8aa4f67281e57bb6538dc961c4b65c670 100644 --- a/kernel/trace/tracing_map.c +++ b/kernel/trace/tracing_map.c @@ -454,7 +454,7 @@ static struct tracing_map_elt *get_free_elt(struct tracing_map *map) struct tracing_map_elt *elt = NULL; int idx; - idx = atomic_inc_return(&map->next_elt); + idx = atomic_fetch_add_unless(&map->next_elt, 1, map->max_elts); if (idx < map->max_elts) { elt = *(TRACING_MAP_ELT(map->elts, idx)); if (map->ops && map->ops->elt_init) @@ -699,7 +699,7 @@ void tracing_map_clear(struct tracing_map *map) { unsigned int i; - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); atomic64_set(&map->hits, 0); atomic64_set(&map->drops, 0); @@ -783,7 +783,7 @@ struct tracing_map *tracing_map_create(unsigned int map_bits, map->map_bits = map_bits; map->max_elts = (1 << map_bits); - atomic_set(&map->next_elt, -1); + atomic_set(&map->next_elt, 0); map->map_size = (1 << (map_bits + 1)); map->ops = ops; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede3e51ee09c3db11ff9e93ac81caf8..e7b005ff375033f1bf7e67ac6b69f08bd3a888bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -377,7 +377,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ - struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ + struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; @@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { - return (v >> shift) & ((1 << bits) - 1); + return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) @@ -3351,7 +3351,6 @@ woke_up: set_pf_worker(false); ida_free(&pool->worker_ida, worker->id); - WARN_ON_ONCE(!list_empty(&worker->entry)); return 0; } @@ -4167,7 +4166,6 @@ already_gone: static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; - unsigned long data; if (WARN_ON(!wq_online)) return false; @@ -4185,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ - data = *work_data_bits(work); - if (from_cancel && - !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { - /* - * On RT, prevent a live lock when %current preempted soft - * interrupt processing or prevents ksoftirqd from running by - * keeping flipping BH. If the BH work item runs on a different - * CPU then this has no effect other than doing the BH - * disable/enable dance for nothing. This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). - */ - while (!try_wait_for_completion(&barr.done)) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); - } else { - cpu_relax(); + if (from_cancel) { + unsigned long data = *work_data_bits(work); + + if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && + (data & WORK_OFFQ_BH)) { + /* + * On RT, prevent a live lock when %current preempted + * soft interrupt processing or prevents ksoftirqd from + * running by keeping flipping BH. If the BH work item + * runs on a different CPU then this has no effect other + * than doing the BH disable/enable dance for nothing. + * This is copied from + * kernel/softirq.c::tasklet_unlock_spin_wait(). + */ + while (!try_wait_for_completion(&barr.done)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } } + goto out_destroy; } - } else { - wait_for_completion(&barr.done); } + wait_for_completion(&barr.done); + +out_destroy: destroy_work_on_stack(&barr.work); return true; } diff --git a/lib/btree.c b/lib/btree.c index 49420cae3a8330ac650ff3114ee98cb2ed8e088a..bb81d3393ac5c52cd2a80e2712380e6dfe8b8e47 100644 --- a/lib/btree.c +++ b/lib/btree.c @@ -43,7 +43,6 @@ #include #include -#define MAX(a, b) ((a) > (b) ? (a) : (b)) #define NODESIZE MAX(L1_CACHE_BYTES, 128) struct btree_geo { diff --git a/lib/decompress_unlzma.c b/lib/decompress_unlzma.c index 20a858031f12b7600f906a90584d73aa21e2311e..9d34d35908daa89c74bf4e53864b49e4434e18c5 100644 --- a/lib/decompress_unlzma.c +++ b/lib/decompress_unlzma.c @@ -37,7 +37,9 @@ #include +#ifndef MIN #define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#endif static long long INIT read_int(unsigned char *ptr, int size) { diff --git a/lib/generic-radix-tree.c b/lib/generic-radix-tree.c index aaefb9b678c8e4e1277411a27c554604686d3ddc..fa692c86f06969239bd501bd30a4723d20e2b291 100644 --- a/lib/generic-radix-tree.c +++ b/lib/generic-radix-tree.c @@ -121,6 +121,8 @@ void *__genradix_ptr_alloc(struct __genradix *radix, size_t offset, if ((v = cmpxchg_release(&radix->root, r, new_root)) == r) { v = new_root; new_node = NULL; + } else { + new_node->children[0] = NULL; } } diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c index f314a0c15a6d83d8fff4a71b71347fba6da112f2..2abc78367dd110cf8aaaa86190ee9a6248786441 100644 --- a/lib/overflow_kunit.c +++ b/lib/overflow_kunit.c @@ -668,7 +668,6 @@ DEFINE_TEST_ALLOC(devm_kzalloc, devm_kfree, 1, 1, 0); static void overflow_allocation_test(struct kunit *test) { - const char device_name[] = "overflow-test"; struct device *dev; int count = 0; @@ -678,7 +677,7 @@ static void overflow_allocation_test(struct kunit *test) } while (0) /* Create dummy device for devm_kmalloc()-family tests. */ - dev = kunit_device_register(test, device_name); + dev = kunit_device_register(test, "overflow-test"); KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev), "Cannot register test device\n"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index cdd4e2314bfc6ed6800ae4ed7ebcb4e5432b20bb..2d71b11159161b122bceb4131c46ca9ac37f282f 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1080,7 +1080,7 @@ char *resource_string(char *buf, char *end, struct resource *res, #define FLAG_BUF_SIZE (2 * sizeof(res->flags)) #define DECODED_BUF_SIZE sizeof("[mem - 64bit pref window disabled]") #define RAW_BUF_SIZE sizeof("[mem - flags 0x]") - char sym[max(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, + char sym[MAX(2*RSRC_BUF_SIZE + DECODED_BUF_SIZE, 2*RSRC_BUF_SIZE + FLAG_BUF_SIZE + RAW_BUF_SIZE)]; char *p = sym, *pend = sym + sizeof(sym); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f4be468e06a49a7ad25e5ea3af0e74fcf1c2a678..67c86a5d64a6a9825092c54ba801c91fa30121db 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1685,7 +1685,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { spin_unlock(vmf->ptl); - goto out; + return 0; } pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1728,22 +1728,16 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { flags |= TNF_MIGRATED; nid = target_nid; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { - spin_unlock(vmf->ptl); - goto out; - } - goto out_map; - } - -out: - if (nid != NUMA_NO_NODE) task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; + } - return 0; - + flags |= TNF_MIGRATE_FAIL; + vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); + if (unlikely(!pmd_same(oldpmd, *vmf->pmd))) { + spin_unlock(vmf->ptl); + return 0; + } out_map: /* Restore the PMD */ pmd = pmd_modify(oldpmd, vma->vm_page_prot); @@ -1753,7 +1747,10 @@ out_map: set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd); update_mmu_cache_pmd(vma, vmf->address, vmf->pmd); spin_unlock(vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, HPAGE_PMD_NR, flags); + return 0; } /* diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index 829112b0a914c9aacf3e5aab38e144a1eb7d3f40..0c3f56b3578eba44f25870fd8bd6f25ee8bdf9cb 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -185,11 +185,11 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end, static inline void free_vmemmap_page(struct page *page) { if (PageReserved(page)) { + memmap_boot_pages_add(-1); free_bootmem_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP_BOOT, -1); } else { + memmap_pages_add(-1); __free_page(page); - mod_node_page_state(page_pgdat(page), NR_MEMMAP, -1); } } @@ -341,7 +341,7 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end, copy_page(page_to_virt(walk.reuse_page), (void *)walk.reuse_addr); list_add(&walk.reuse_page->lru, vmemmap_pages); - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, 1); + memmap_pages_add(1); } /* @@ -392,14 +392,11 @@ static int alloc_vmemmap_page_list(unsigned long start, unsigned long end, for (i = 0; i < nr_pages; i++) { page = alloc_pages_node(nid, gfp_mask, 0); - if (!page) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, i); + if (!page) goto out; - } list_add(&page->lru, list); } - - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, nr_pages); + memmap_pages_add(nr_pages); return 0; out: diff --git a/mm/list_lru.c b/mm/list_lru.c index a29d96929d7c7e17138995319ca511e231d1b582..9b7ff06e9d326bb0da9f486da6c05a56c77d786c 100644 --- a/mm/list_lru.c +++ b/mm/list_lru.c @@ -85,6 +85,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx) } #endif /* CONFIG_MEMCG */ +/* The caller must ensure the memcg lifetime. */ bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -109,14 +110,22 @@ EXPORT_SYMBOL_GPL(list_lru_add); bool list_lru_add_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_add(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_add(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_add(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_add_obj); +/* The caller must ensure the memcg lifetime. */ bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid, struct mem_cgroup *memcg) { @@ -139,11 +148,18 @@ EXPORT_SYMBOL_GPL(list_lru_del); bool list_lru_del_obj(struct list_lru *lru, struct list_head *item) { + bool ret; int nid = page_to_nid(virt_to_page(item)); - struct mem_cgroup *memcg = list_lru_memcg_aware(lru) ? - mem_cgroup_from_slab_obj(item) : NULL; - return list_lru_del(lru, item, nid, memcg); + if (list_lru_memcg_aware(lru)) { + rcu_read_lock(); + ret = list_lru_del(lru, item, nid, mem_cgroup_from_slab_obj(item)); + rcu_read_unlock(); + } else { + ret = list_lru_del(lru, item, nid, NULL); + } + + return ret; } EXPORT_SYMBOL_GPL(list_lru_del_obj); diff --git a/mm/memcontrol-v1.c b/mm/memcontrol-v1.c index 2aeea4d8bf8e53b24748b431c16b4b72af22e923..417c96f2da28e8c3984792903d5daed1786a9e2b 100644 --- a/mm/memcontrol-v1.c +++ b/mm/memcontrol-v1.c @@ -1842,9 +1842,12 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of, buf = endp + 1; cfd = simple_strtoul(buf, &endp, 10); - if ((*endp != ' ') && (*endp != '\0')) + if (*endp == '\0') + buf = endp; + else if (*endp == ' ') + buf = endp + 1; + else return -EINVAL; - buf = endp + 1; event = kzalloc(sizeof(*event), GFP_KERNEL); if (!event) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 960371788687720de58cdf146b900d6efb66c04c..f29157288b7dd68a51ebf19f2d361aa66132d675 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3386,11 +3386,28 @@ static void memcg_wb_domain_size_changed(struct mem_cgroup *memcg) #define MEM_CGROUP_ID_MAX ((1UL << MEM_CGROUP_ID_SHIFT) - 1) static DEFINE_IDR(mem_cgroup_idr); +static DEFINE_SPINLOCK(memcg_idr_lock); + +static int mem_cgroup_alloc_id(void) +{ + int ret; + + idr_preload(GFP_KERNEL); + spin_lock(&memcg_idr_lock); + ret = idr_alloc(&mem_cgroup_idr, NULL, 1, MEM_CGROUP_ID_MAX + 1, + GFP_NOWAIT); + spin_unlock(&memcg_idr_lock); + idr_preload_end(); + return ret; +} static void mem_cgroup_id_remove(struct mem_cgroup *memcg) { if (memcg->id.id > 0) { + spin_lock(&memcg_idr_lock); idr_remove(&mem_cgroup_idr, memcg->id.id); + spin_unlock(&memcg_idr_lock); + memcg->id.id = 0; } } @@ -3524,8 +3541,7 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) if (!memcg) return ERR_PTR(error); - memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL, - 1, MEM_CGROUP_ID_MAX + 1, GFP_KERNEL); + memcg->id.id = mem_cgroup_alloc_id(); if (memcg->id.id < 0) { error = memcg->id.id; goto fail; @@ -3667,7 +3683,9 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) * publish it here at the end of onlining. This matches the * regular ID destruction during offlining. */ + spin_lock(&memcg_idr_lock); idr_replace(&mem_cgroup_idr, memcg, memcg->id.id); + spin_unlock(&memcg_idr_lock); return 0; offline_kmem: diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 581d3e5c911758d09719f457ffa2fd65d953d222..7066fc84f35172a55030addda85eddbb3c9828d8 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -2417,7 +2417,7 @@ struct memory_failure_entry { struct memory_failure_cpu { DECLARE_KFIFO(fifo, struct memory_failure_entry, MEMORY_FAILURE_FIFO_SIZE); - spinlock_t lock; + raw_spinlock_t lock; struct work_struct work; }; @@ -2443,20 +2443,22 @@ void memory_failure_queue(unsigned long pfn, int flags) { struct memory_failure_cpu *mf_cpu; unsigned long proc_flags; + bool buffer_overflow; struct memory_failure_entry entry = { .pfn = pfn, .flags = flags, }; mf_cpu = &get_cpu_var(memory_failure_cpu); - spin_lock_irqsave(&mf_cpu->lock, proc_flags); - if (kfifo_put(&mf_cpu->fifo, entry)) + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); + buffer_overflow = !kfifo_put(&mf_cpu->fifo, entry); + if (!buffer_overflow) schedule_work_on(smp_processor_id(), &mf_cpu->work); - else + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + put_cpu_var(memory_failure_cpu); + if (buffer_overflow) pr_err("buffer overflow when queuing memory failure at %#lx\n", pfn); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); - put_cpu_var(memory_failure_cpu); } EXPORT_SYMBOL_GPL(memory_failure_queue); @@ -2469,9 +2471,9 @@ static void memory_failure_work_func(struct work_struct *work) mf_cpu = container_of(work, struct memory_failure_cpu, work); for (;;) { - spin_lock_irqsave(&mf_cpu->lock, proc_flags); + raw_spin_lock_irqsave(&mf_cpu->lock, proc_flags); gotten = kfifo_get(&mf_cpu->fifo, &entry); - spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); + raw_spin_unlock_irqrestore(&mf_cpu->lock, proc_flags); if (!gotten) break; if (entry.flags & MF_SOFT_OFFLINE) @@ -2501,7 +2503,7 @@ static int __init memory_failure_init(void) for_each_possible_cpu(cpu) { mf_cpu = &per_cpu(memory_failure_cpu, cpu); - spin_lock_init(&mf_cpu->lock); + raw_spin_lock_init(&mf_cpu->lock); INIT_KFIFO(mf_cpu->fifo); INIT_WORK(&mf_cpu->work, memory_failure_work_func); } diff --git a/mm/memory.c b/mm/memory.c index 34f8402d2046f366a99fe072641c6e05c1d3e618..3c01d68065be21a45d8b9085d7f81785bc5cbfd8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -5295,7 +5295,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (unlikely(!pte_same(old_pte, vmf->orig_pte))) { pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + return 0; } pte = pte_modify(old_pte, vma->vm_page_prot); @@ -5358,23 +5358,19 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) if (!migrate_misplaced_folio(folio, vma, target_nid)) { nid = target_nid; flags |= TNF_MIGRATED; - } else { - flags |= TNF_MIGRATE_FAIL; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); - if (unlikely(!vmf->pte)) - goto out; - if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { - pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; - } - goto out_map; + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } -out: - if (nid != NUMA_NO_NODE) - task_numa_fault(last_cpupid, nid, nr_pages, flags); - return 0; + flags |= TNF_MIGRATE_FAIL; + vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, + vmf->address, &vmf->ptl); + if (unlikely(!vmf->pte)) + return 0; + if (unlikely(!pte_same(ptep_get(vmf->pte), vmf->orig_pte))) { + pte_unmap_unlock(vmf->pte, vmf->ptl); + return 0; + } out_map: /* * Make it present again, depending on how arch implements @@ -5387,7 +5383,10 @@ out_map: numa_rebuild_single_mapping(vmf, vma, vmf->address, vmf->pte, writable); pte_unmap_unlock(vmf->pte, vmf->ptl); - goto out; + + if (nid != NUMA_NO_NODE) + task_numa_fault(last_cpupid, nid, nr_pages, flags); + return 0; } static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf) diff --git a/mm/migrate.c b/mm/migrate.c index e7296c0fb5d57a2ee8585fca9e0a031be090f97f..923ea80ba7442b82c58f4a1680a77562ee154f98 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1479,11 +1479,17 @@ out: return rc; } -static inline int try_split_folio(struct folio *folio, struct list_head *split_folios) +static inline int try_split_folio(struct folio *folio, struct list_head *split_folios, + enum migrate_mode mode) { int rc; - folio_lock(folio); + if (mode == MIGRATE_ASYNC) { + if (!folio_trylock(folio)) + return -EAGAIN; + } else { + folio_lock(folio); + } rc = split_folio_to_list(folio, split_folios); folio_unlock(folio); if (!rc) @@ -1677,7 +1683,7 @@ static int migrate_pages_batch(struct list_head *from, */ if (nr_pages > 2 && !list_empty(&folio->_deferred_list)) { - if (try_split_folio(folio, split_folios) == 0) { + if (!try_split_folio(folio, split_folios, mode)) { nr_failed++; stats->nr_thp_failed += is_thp; stats->nr_thp_split += is_thp; @@ -1699,7 +1705,7 @@ static int migrate_pages_batch(struct list_head *from, if (!thp_migration_supported() && is_thp) { nr_failed++; stats->nr_thp_failed++; - if (!try_split_folio(folio, split_folios)) { + if (!try_split_folio(folio, split_folios, mode)) { stats->nr_thp_split++; stats->nr_split++; continue; @@ -1731,7 +1737,7 @@ static int migrate_pages_batch(struct list_head *from, stats->nr_thp_failed += is_thp; /* Large folio NUMA faulting doesn't split to retry. */ if (is_large && !nosplit) { - int ret = try_split_folio(folio, split_folios); + int ret = try_split_folio(folio, split_folios, mode); if (!ret) { stats->nr_thp_split += is_thp; diff --git a/mm/mm_init.c b/mm/mm_init.c index 75c3bd42799b934148c97c259f4180e7a4fb2f0c..51960079875baad0fee3005269fe59e2a5f74d68 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1623,8 +1623,7 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) panic("Failed to allocate %ld bytes for node %d memory map\n", size, pgdat->node_id); pgdat->node_mem_map = map + offset; - mod_node_early_perpage_metadata(pgdat->node_id, - DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", __func__, pgdat->node_id, (unsigned long)pgdat, (unsigned long)pgdat->node_mem_map); @@ -2245,6 +2244,8 @@ void __init init_cma_reserved_pageblock(struct page *page) set_pageblock_migratetype(page, MIGRATE_CMA); set_page_refcounted(page); + /* pages were reserved and not allocated */ + clear_page_tag_ref(page); __free_pages(page, pageblock_order); adjust_managed_page_count(page, pageblock_nr_pages); @@ -2460,15 +2461,7 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn, } /* pages were reserved and not allocated */ - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } - + clear_page_tag_ref(page); __free_pages_core(page, order, MEMINIT_EARLY); } diff --git a/mm/mseal.c b/mm/mseal.c index bf783bba8ed0b92ba80fac5b3459d9bfe0ceb4b4..15bba28acc005fdd8725f4dda8937bbf364433c2 100644 --- a/mm/mseal.c +++ b/mm/mseal.c @@ -40,9 +40,17 @@ static bool can_modify_vma(struct vm_area_struct *vma) static bool is_madv_discard(int behavior) { - return behavior & - (MADV_FREE | MADV_DONTNEED | MADV_DONTNEED_LOCKED | - MADV_REMOVE | MADV_DONTFORK | MADV_WIPEONFORK); + switch (behavior) { + case MADV_FREE: + case MADV_DONTNEED: + case MADV_DONTNEED_LOCKED: + case MADV_REMOVE: + case MADV_DONTFORK: + case MADV_WIPEONFORK: + return true; + } + + return false; } static bool is_ro_anon(struct vm_area_struct *vma) diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 28f80daf5c0418163fd111052e61b24ab2a7ba41..c565de8f48e9d8cd10bfb53988373350f9c98ec3 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -287,7 +287,7 @@ EXPORT_SYMBOL(nr_online_nodes); static bool page_contains_unaccepted(struct page *page, unsigned int order); static void accept_page(struct page *page, unsigned int order); -static bool try_to_accept_memory(struct zone *zone, unsigned int order); +static bool cond_accept_memory(struct zone *zone, unsigned int order); static inline bool has_unaccepted_memory(void); static bool __free_unaccepted(struct page *page); @@ -3072,9 +3072,6 @@ static inline long __zone_watermark_unusable_free(struct zone *z, if (!(alloc_flags & ALLOC_CMA)) unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES); #endif -#ifdef CONFIG_UNACCEPTED_MEMORY - unusable_free += zone_page_state(z, NR_UNACCEPTED); -#endif return unusable_free; } @@ -3368,6 +3365,8 @@ retry: } } + cond_accept_memory(zone, order); + /* * Detect whether the number of free pages is below high * watermark. If so, we will decrease pcp->high and free @@ -3393,10 +3392,8 @@ check_alloc_wmark: gfp_mask)) { int ret; - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* @@ -3450,10 +3447,8 @@ try_this_zone: return page; } else { - if (has_unaccepted_memory()) { - if (try_to_accept_memory(zone, order)) - goto try_this_zone; - } + if (cond_accept_memory(zone, order)) + goto try_this_zone; #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT /* Try again if zone has deferred pages */ @@ -5755,7 +5750,6 @@ void __init setup_per_cpu_pageset(void) for_each_online_pgdat(pgdat) pgdat->per_cpu_nodestats = alloc_percpu(struct per_cpu_nodestat); - store_early_perpage_metadata(); } __meminit void zone_pcp_init(struct zone *zone) @@ -5821,14 +5815,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char void free_reserved_page(struct page *page) { - if (mem_alloc_profiling_enabled()) { - union codetag_ref *ref = get_page_tag_ref(page); - - if (ref) { - set_codetag_empty(ref); - put_page_tag_ref(ref); - } - } + clear_page_tag_ref(page); ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -6951,9 +6938,6 @@ static bool try_to_accept_memory_one(struct zone *zone) struct page *page; bool last; - if (list_empty(&zone->unaccepted_pages)) - return false; - spin_lock_irqsave(&zone->lock, flags); page = list_first_entry_or_null(&zone->unaccepted_pages, struct page, lru); @@ -6979,23 +6963,29 @@ static bool try_to_accept_memory_one(struct zone *zone) return true; } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { long to_accept; - int ret = false; + bool ret = false; + + if (!has_unaccepted_memory()) + return false; + + if (list_empty(&zone->unaccepted_pages)) + return false; /* How much to accept to get to high watermark? */ to_accept = high_wmark_pages(zone) - (zone_page_state(zone, NR_FREE_PAGES) - - __zone_watermark_unusable_free(zone, order, 0)); + __zone_watermark_unusable_free(zone, order, 0) - + zone_page_state(zone, NR_UNACCEPTED)); - /* Accept at least one page */ - do { + while (to_accept > 0) { if (!try_to_accept_memory_one(zone)) break; ret = true; to_accept -= MAX_ORDER_NR_PAGES; - } while (to_accept > 0); + } return ret; } @@ -7038,7 +7028,7 @@ static void accept_page(struct page *page, unsigned int order) { } -static bool try_to_accept_memory(struct zone *zone, unsigned int order) +static bool cond_accept_memory(struct zone *zone, unsigned int order) { return false; } diff --git a/mm/page_ext.c b/mm/page_ext.c index c191e490c401ba1165de0d89351b3c1c3ac65a1b..641d93f6af4c1e2563aae6885a1e098bf1060bc8 100644 --- a/mm/page_ext.c +++ b/mm/page_ext.c @@ -214,8 +214,7 @@ static int __init alloc_node_page_ext(int nid) return -ENOMEM; NODE_DATA(nid)->node_page_ext = base; total_usage += table_size; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - DIV_ROUND_UP(table_size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(table_size, PAGE_SIZE)); return 0; } @@ -275,10 +274,8 @@ static void *__meminit alloc_page_ext(size_t size, int nid) else addr = vzalloc_node(size, nid); - if (addr) { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(size, PAGE_SIZE)); - } + if (addr) + memmap_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); return addr; } @@ -323,25 +320,18 @@ static void free_page_ext(void *addr) { size_t table_size; struct page *page; - struct pglist_data *pgdat; table_size = page_ext_size * PAGES_PER_SECTION; + memmap_pages_add(-1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); if (is_vmalloc_addr(addr)) { - page = vmalloc_to_page(addr); - pgdat = page_pgdat(page); vfree(addr); } else { page = virt_to_page(addr); - pgdat = page_pgdat(page); BUG_ON(PageReserved(page)); kmemleak_free(addr); free_pages_exact(addr, table_size); } - - mod_node_page_state(pgdat, NR_MEMMAP, - -1L * (DIV_ROUND_UP(table_size, PAGE_SIZE))); - } static void __free_page_ext(unsigned long pfn) diff --git a/mm/shmem.c b/mm/shmem.c index 2faa9daaf54baaf853460fcd424d6bfe5fb1a056..5a77acf6ac6a621dc7b5e7b46402b2b714b45bea 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1629,11 +1629,6 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, unsigned long mask = READ_ONCE(huge_shmem_orders_always); unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); unsigned long vm_flags = vma->vm_flags; - /* - * Check all the (large) orders below HPAGE_PMD_ORDER + 1 that - * are enabled for this vma. - */ - unsigned long orders = BIT(PMD_ORDER + 1) - 1; loff_t i_size; int order; @@ -1678,7 +1673,7 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, if (global_huge) mask |= READ_ONCE(huge_shmem_orders_inherit); - return orders & mask; + return THP_ORDERS_ALL_FILE_DEFAULT & mask; } static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault *vmf, @@ -1686,6 +1681,7 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault unsigned long orders) { struct vm_area_struct *vma = vmf->vma; + pgoff_t aligned_index; unsigned long pages; int order; @@ -1697,9 +1693,9 @@ static unsigned long shmem_suitable_orders(struct inode *inode, struct vm_fault order = highest_order(orders); while (orders) { pages = 1UL << order; - index = round_down(index, pages); - if (!xa_find(&mapping->i_pages, &index, - index + pages - 1, XA_PRESENT)) + aligned_index = round_down(index, pages); + if (!xa_find(&mapping->i_pages, &aligned_index, + aligned_index + pages - 1, XA_PRESENT)) break; order = next_order(&orders, order); } diff --git a/mm/slub.c b/mm/slub.c index 3520acaf9afaf356e7bc149a9ca6ea25340d3dec..c9d8a2497fd65e1f48cd933a67bf75ab8f9c117b 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -4690,6 +4690,9 @@ static void __kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p) if (!df.slab) continue; + if (kfence_free(df.freelist)) + continue; + do_slab_free(df.s, df.slab, df.freelist, df.tail, df.cnt, _RET_IP_); } while (likely(size)); diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 1dda6c53370b0a6aca1620ee96f9ae448349ea85..edcc7a6b0f6f2072e730e45a43a96eefded18302 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -469,13 +469,10 @@ struct page * __meminit __populate_section_memmap(unsigned long pfn, if (r < 0) return NULL; - if (system_state == SYSTEM_BOOTING) { - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(end - start, - PAGE_SIZE)); - } else { - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP, - DIV_ROUND_UP(end - start, PAGE_SIZE)); - } + if (system_state == SYSTEM_BOOTING) + memmap_boot_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); + else + memmap_pages_add(DIV_ROUND_UP(end - start, PAGE_SIZE)); return pfn_to_page(pfn); } diff --git a/mm/sparse.c b/mm/sparse.c index e4b830091d137387c1a18536e494a8f2db8f8c58..0f018c6f9ec52f324237bd9b671f39f3a22e3287 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -463,7 +463,7 @@ static void __init sparse_buffer_init(unsigned long size, int nid) sparsemap_buf = memmap_alloc(size, section_map_size(), addr, nid, true); sparsemap_buf_end = sparsemap_buf + size; #ifndef CONFIG_SPARSEMEM_VMEMMAP - mod_node_early_perpage_metadata(nid, DIV_ROUND_UP(size, PAGE_SIZE)); + memmap_boot_pages_add(DIV_ROUND_UP(size, PAGE_SIZE)); #endif } @@ -643,8 +643,7 @@ static void depopulate_section_memmap(unsigned long pfn, unsigned long nr_pages, unsigned long start = (unsigned long) pfn_to_page(pfn); unsigned long end = start + nr_pages * sizeof(struct page); - mod_node_page_state(page_pgdat(pfn_to_page(pfn)), NR_MEMMAP, - -1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); + memmap_pages_add(-1L * (DIV_ROUND_UP(end - start, PAGE_SIZE))); vmemmap_free(start, end, altmap); } static void free_map_bootmem(struct page *memmap) diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b783baf12a14323361c53196f8e2a2645dc1142..af2de36549d60807a8bea9ed9e08cc8c389656f5 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -3584,15 +3584,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, page = alloc_pages_noprof(alloc_gfp, order); else page = alloc_pages_node_noprof(nid, alloc_gfp, order); - if (unlikely(!page)) { - if (!nofail) - break; - - /* fall back to the zero order allocations */ - alloc_gfp |= __GFP_NOFAIL; - order = 0; - continue; - } + if (unlikely(!page)) + break; /* * Higher order allocations must be able to be treated as diff --git a/mm/vmstat.c b/mm/vmstat.c index 04a1cb6cc6365a971c56882476f3b44d3cf6bd15..e875f2a4915f5f79df8a121270a30896ada043ff 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1033,6 +1033,24 @@ unsigned long node_page_state(struct pglist_data *pgdat, } #endif +/* + * Count number of pages "struct page" and "struct page_ext" consume. + * nr_memmap_boot_pages: # of pages allocated by boot allocator + * nr_memmap_pages: # of pages that were allocated by buddy allocator + */ +static atomic_long_t nr_memmap_boot_pages = ATOMIC_LONG_INIT(0); +static atomic_long_t nr_memmap_pages = ATOMIC_LONG_INIT(0); + +void memmap_boot_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_boot_pages); +} + +void memmap_pages_add(long delta) +{ + atomic_long_add(delta, &nr_memmap_pages); +} + #ifdef CONFIG_COMPACTION struct contig_page_info { @@ -1255,11 +1273,11 @@ const char * const vmstat_text[] = { "pgdemote_kswapd", "pgdemote_direct", "pgdemote_khugepaged", - "nr_memmap", - "nr_memmap_boot", - /* enum writeback_stat_item counters */ + /* system-wide enum vm_stat_item counters */ "nr_dirty_threshold", "nr_dirty_background_threshold", + "nr_memmap_pages", + "nr_memmap_boot_pages", #if defined(CONFIG_VM_EVENT_COUNTERS) || defined(CONFIG_MEMCG) /* enum vm_event_item counters */ @@ -1790,7 +1808,7 @@ static const struct seq_operations zoneinfo_op = { #define NR_VMSTAT_ITEMS (NR_VM_ZONE_STAT_ITEMS + \ NR_VM_NUMA_EVENT_ITEMS + \ NR_VM_NODE_STAT_ITEMS + \ - NR_VM_WRITEBACK_STAT_ITEMS + \ + NR_VM_STAT_ITEMS + \ (IS_ENABLED(CONFIG_VM_EVENT_COUNTERS) ? \ NR_VM_EVENT_ITEMS : 0)) @@ -1827,7 +1845,9 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos) global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD, v + NR_DIRTY_THRESHOLD); - v += NR_VM_WRITEBACK_STAT_ITEMS; + v[NR_MEMMAP_PAGES] = atomic_long_read(&nr_memmap_pages); + v[NR_MEMMAP_BOOT_PAGES] = atomic_long_read(&nr_memmap_boot_pages); + v += NR_VM_STAT_ITEMS; #ifdef CONFIG_VM_EVENT_COUNTERS all_vm_events(v); @@ -2285,25 +2305,3 @@ static int __init extfrag_debug_init(void) module_init(extfrag_debug_init); #endif - -/* - * Page metadata size (struct page and page_ext) in pages - */ -static unsigned long early_perpage_metadata[MAX_NUMNODES] __meminitdata; - -void __meminit mod_node_early_perpage_metadata(int nid, long delta) -{ - early_perpage_metadata[nid] += delta; -} - -void __meminit store_early_perpage_metadata(void) -{ - int nid; - struct pglist_data *pgdat; - - for_each_online_pgdat(pgdat) { - nid = pgdat->node_id; - mod_node_page_state(NODE_DATA(nid), NR_MEMMAP_BOOT, - early_perpage_metadata[nid]); - } -} diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 5d6581ab7c073d5c1cb933bcd8ef7991d4a77d5f..2d3163e4da96a565f7c2f8bcc6ac122ed64aa2f0 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -120,8 +120,6 @@ #define CLASS_BITS 8 #define MAGIC_VAL_BITS 8 -#define MAX(a, b) ((a) >= (b) ? (a) : (b)) - #define ZS_MAX_PAGES_PER_ZSPAGE (_AC(CONFIG_ZSMALLOC_CHAIN_SIZE, UL)) /* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */ diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 8a4ebd93adfcd7572f8e481d99be990efed3f153..f25a21f532aa7556ba2cb7963e5054a0783384a0 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -119,13 +119,6 @@ void hci_discovery_set_state(struct hci_dev *hdev, int state) case DISCOVERY_STARTING: break; case DISCOVERY_FINDING: - /* If discovery was not started then it was initiated by the - * MGMT interface so no MGMT event shall be generated either - */ - if (old_state != DISCOVERY_STARTING) { - hdev->discovery.state = old_state; - return; - } mgmt_discovering(hdev, 1); break; case DISCOVERY_RESOLVING: @@ -3671,19 +3664,19 @@ static void hci_sched_le(struct hci_dev *hdev) { struct hci_chan *chan; struct sk_buff *skb; - int quote, cnt, tmp; + int quote, *cnt, tmp; BT_DBG("%s", hdev->name); if (!hci_conn_num(hdev, LE_LINK)) return; - cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; + cnt = hdev->le_pkts ? &hdev->le_cnt : &hdev->acl_cnt; - __check_timeout(hdev, cnt, LE_LINK); + __check_timeout(hdev, *cnt, LE_LINK); - tmp = cnt; - while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { + tmp = *cnt; + while (*cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; while (quote-- && (skb = skb_peek(&chan->data_q))) { BT_DBG("chan %p skb %p len %d priority %u", chan, skb, @@ -3698,7 +3691,7 @@ static void hci_sched_le(struct hci_dev *hdev) hci_send_frame(hdev, skb); hdev->le_last_tx = jiffies; - cnt--; + (*cnt)--; chan->sent++; chan->conn->sent++; @@ -3708,12 +3701,7 @@ static void hci_sched_le(struct hci_dev *hdev) } } - if (hdev->le_pkts) - hdev->le_cnt = cnt; - else - hdev->acl_cnt = cnt; - - if (cnt != tmp) + if (*cnt != tmp) hci_prio_recalculate(hdev, LE_LINK); } diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index dce8035ca7990cfbddbe6a8edf7163fcdad7ee7b..1c82dcdf6e8fc76e4a0a5e690c68ad9653c2942d 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1721,9 +1721,10 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) switch (enable) { case LE_SCAN_ENABLE: hci_dev_set_flag(hdev, HCI_LE_SCAN); - if (hdev->le_scan_type == LE_SCAN_ACTIVE) + if (hdev->le_scan_type == LE_SCAN_ACTIVE) { clear_pending_adv_report(hdev); - hci_discovery_set_state(hdev, DISCOVERY_FINDING); + hci_discovery_set_state(hdev, DISCOVERY_FINDING); + } break; case LE_SCAN_DISABLE: @@ -5919,7 +5920,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, * while we have an existing one in peripheral role. */ if (hdev->conn_hash.le_num_peripheral > 0 && - (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) || + (test_bit(HCI_QUIRK_BROKEN_LE_STATES, &hdev->quirks) || !(hdev->le_states[3] & 0x10))) return NULL; diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index cd2ed16da8a466c3f03f99254790a76e041a2c42..e79cd40bd079df3848ebc04efa777b8b4d48373a 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -2976,6 +2976,27 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) */ filter_policy = hci_update_accept_list_sync(hdev); + /* If suspended and filter_policy set to 0x00 (no acceptlist) then + * passive scanning cannot be started since that would require the host + * to be woken up to process the reports. + */ + if (hdev->suspended && !filter_policy) { + /* Check if accept list is empty then there is no need to scan + * while suspended. + */ + if (list_empty(&hdev->le_accept_list)) + return 0; + + /* If there are devices is the accept_list that means some + * devices could not be programmed which in non-suspended case + * means filter_policy needs to be set to 0x00 so the host needs + * to filter, but since this is treating suspended case we + * can ignore device needing host to filter to allow devices in + * the acceptlist to be able to wakeup the system. + */ + filter_policy = 0x01; + } + /* When the controller is using random resolvable addresses and * with that having LE privacy enabled, then controllers with * Extended Scanner Filter Policies support can now enable support @@ -2998,6 +3019,20 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) } else if (hci_is_adv_monitoring(hdev)) { window = hdev->le_scan_window_adv_monitor; interval = hdev->le_scan_int_adv_monitor; + + /* Disable duplicates filter when scanning for advertisement + * monitor for the following reasons. + * + * For HW pattern filtering (ex. MSFT), Realtek and Qualcomm + * controllers ignore RSSI_Sampling_Period when the duplicates + * filter is enabled. + * + * For SW pattern filtering, when we're not doing interleaved + * scanning, it is necessary to disable duplicates filter, + * otherwise hosts can only receive one advertisement and it's + * impossible to know if a peer is still in range. + */ + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; } else { window = hdev->le_scan_window; interval = hdev->le_scan_interval; diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index c3c26bbb5ddaebd2c4faa0da887774a32688b71c..9988ba382b686ad8a59c72dcf65749138af0afb5 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -6774,6 +6774,7 @@ static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm, bt_cb(skb)->l2cap.psm = psm; if (!chan->ops->recv(chan, skb)) { + l2cap_chan_unlock(chan); l2cap_chan_put(chan); return; } diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 40d4887c7f791314040a292a5f363e993e6d384e..25979f4283a6ff413a2cb49b7f8b677dba1569de 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -3456,6 +3456,10 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data, * will be kept and this function does nothing. */ p = hci_conn_params_add(hdev, &cp->addr.bdaddr, addr_type); + if (!p) { + err = -EIO; + goto unlock; + } if (p->auto_connect == HCI_AUTO_CONN_EXPLICIT) p->auto_connect = HCI_AUTO_CONN_DISABLED; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 1e7ea3a4b7ef326a6b2a79d44d8d9226e391934b..4f9fdf400584e53da1a7eb889a168c63322385fe 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -914,7 +914,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth, * Confirms and the responder Enters the passkey. */ if (smp->method == OVERLAP) { - if (hcon->role == HCI_ROLE_MASTER) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp->method = CFM_PASSKEY; else smp->method = REQ_PASSKEY; @@ -964,7 +964,7 @@ static u8 smp_confirm(struct smp_chan *smp) smp_send_cmd(smp->conn, SMP_CMD_PAIRING_CONFIRM, sizeof(cp), &cp); - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); else SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -980,7 +980,8 @@ static u8 smp_random(struct smp_chan *smp) int ret; bt_dev_dbg(conn->hcon->hdev, "conn %p %s", conn, - conn->hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp, hcon->init_addr_type, &hcon->init_addr, @@ -994,7 +995,7 @@ static u8 smp_random(struct smp_chan *smp) return SMP_CONFIRM_FAILED; } - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 stk[16]; __le64 rand = 0; __le16 ediv = 0; @@ -1256,14 +1257,15 @@ static void smp_distribute_keys(struct smp_chan *smp) rsp = (void *) &smp->prsp[1]; /* The responder sends its keys first */ - if (hcon->out && (smp->remote_key_dist & KEY_DIST_MASK)) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags) && + (smp->remote_key_dist & KEY_DIST_MASK)) { smp_allow_key_dist(smp); return; } req = (void *) &smp->preq[1]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { keydist = &rsp->init_key_dist; *keydist &= req->init_key_dist; } else { @@ -1432,7 +1434,7 @@ static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) struct hci_conn *hcon = smp->conn->hcon; u8 *na, *nb, a[7], b[7]; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { na = smp->prnd; nb = smp->rrnd; } else { @@ -1460,7 +1462,7 @@ static void sc_dhkey_check(struct smp_chan *smp) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->preq[1], 3); @@ -1539,7 +1541,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) /* The round is only complete when the initiator * receives pairing random. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); if (smp->passkey_round == 20) @@ -1567,7 +1569,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); return 0; @@ -1578,7 +1580,7 @@ static u8 sc_passkey_round(struct smp_chan *smp, u8 smp_op) case SMP_CMD_PUBLIC_KEY: default: /* Initiating device starts the round */ - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; bt_dev_dbg(hdev, "Starting passkey round %u", @@ -1623,7 +1625,7 @@ static int sc_user_reply(struct smp_chan *smp, u16 mgmt_op, __le32 passkey) } /* Initiator sends DHKey check first */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } else if (test_and_clear_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags)) { @@ -1746,7 +1748,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) struct smp_cmd_pairing rsp, *req = (void *) skb->data; struct l2cap_chan *chan = conn->smp; struct hci_dev *hdev = conn->hcon->hdev; - struct smp_chan *smp; + struct smp_chan *smp = chan->data; u8 key_size, auth, sec_level; int ret; @@ -1755,16 +1757,14 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*req)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_SLAVE) + if (smp && test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; - if (!chan->data) + if (!smp) { smp = smp_chan_create(conn); - else - smp = chan->data; - - if (!smp) - return SMP_UNSPECIFIED; + if (!smp) + return SMP_UNSPECIFIED; + } /* We didn't start the pairing, so match remote */ auth = req->auth_req & AUTH_REQ_MASK(hdev); @@ -1946,7 +1946,7 @@ static u8 smp_cmd_pairing_rsp(struct l2cap_conn *conn, struct sk_buff *skb) if (skb->len < sizeof(*rsp)) return SMP_INVALID_PARAMS; - if (conn->hcon->role != HCI_ROLE_MASTER) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_CMD_NOTSUPP; skb_pull(skb, sizeof(*rsp)); @@ -2041,7 +2041,7 @@ static u8 sc_check_confirm(struct smp_chan *smp) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_CONFIRM); - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2063,7 +2063,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) u8 auth; /* The issue is only observed when we're in responder role */ - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { @@ -2099,7 +2099,8 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) struct hci_dev *hdev = hcon->hdev; bt_dev_dbg(hdev, "conn %p %s", conn, - hcon->out ? "initiator" : "responder"); + test_bit(SMP_FLAG_INITIATOR, &smp->flags) ? "initiator" : + "responder"); if (skb->len < sizeof(smp->pcnf)) return SMP_INVALID_PARAMS; @@ -2121,7 +2122,7 @@ static u8 smp_cmd_pairing_confirm(struct l2cap_conn *conn, struct sk_buff *skb) return ret; } - if (conn->hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RANDOM); @@ -2156,7 +2157,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (!test_bit(SMP_FLAG_SC, &smp->flags)) return smp_random(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { pkax = smp->local_pk; pkbx = smp->remote_pk; na = smp->prnd; @@ -2169,7 +2170,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (!hcon->out) + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); @@ -2180,7 +2181,7 @@ static u8 smp_cmd_pairing_random(struct l2cap_conn *conn, struct sk_buff *skb) if (smp->method == REQ_PASSKEY || smp->method == DSP_PASSKEY) return sc_passkey_round(smp, SMP_CMD_PAIRING_RANDOM); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { u8 cfm[16]; err = smp_f4(smp->tfm_cmac, smp->remote_pk, smp->local_pk, @@ -2221,7 +2222,7 @@ mackey_and_ltk: return SMP_UNSPECIFIED; if (smp->method == REQ_OOB) { - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { sc_dhkey_check(smp); SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK); } @@ -2295,10 +2296,27 @@ bool smp_sufficient_security(struct hci_conn *hcon, u8 sec_level, return false; } +static void smp_send_pairing_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_pairing cp; + + if (smp->conn->hcon->type == ACL_LINK) + build_bredr_pairing_cmd(smp, &cp, NULL); + else + build_pairing_cmd(smp->conn, &cp, NULL, auth); + + smp->preq[0] = SMP_CMD_PAIRING_REQ; + memcpy(&smp->preq[1], &cp, sizeof(cp)); + + smp_send_cmd(smp->conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + + set_bit(SMP_FLAG_INITIATOR, &smp->flags); +} + static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) { struct smp_cmd_security_req *rp = (void *) skb->data; - struct smp_cmd_pairing cp; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; struct smp_chan *smp; @@ -2347,16 +2365,20 @@ static u8 smp_cmd_security_req(struct l2cap_conn *conn, struct sk_buff *skb) skb_pull(skb, sizeof(*rp)); - memset(&cp, 0, sizeof(cp)); - build_pairing_cmd(conn, &cp, NULL, auth); + smp_send_pairing_req(smp, auth); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); + return 0; +} - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); +static void smp_send_security_req(struct smp_chan *smp, __u8 auth) +{ + struct smp_cmd_security_req cp; - return 0; + cp.auth_req = auth; + smp_send_cmd(smp->conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); + SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); + + clear_bit(SMP_FLAG_INITIATOR, &smp->flags); } int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) @@ -2427,23 +2449,11 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) authreq |= SMP_AUTH_MITM; } - if (hcon->role == HCI_ROLE_MASTER) { - struct smp_cmd_pairing cp; - - build_pairing_cmd(conn, &cp, NULL, authreq); - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &cp, sizeof(cp)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); - } else { - struct smp_cmd_security_req cp; - cp.auth_req = authreq; - smp_send_cmd(conn, SMP_CMD_SECURITY_REQ, sizeof(cp), &cp); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_REQ); - } + if (hcon->role == HCI_ROLE_MASTER) + smp_send_pairing_req(smp, authreq); + else + smp_send_security_req(smp, authreq); - set_bit(SMP_FLAG_INITIATOR, &smp->flags); ret = 0; unlock: @@ -2694,8 +2704,6 @@ static int smp_cmd_sign_info(struct l2cap_conn *conn, struct sk_buff *skb) static u8 sc_select_method(struct smp_chan *smp) { - struct l2cap_conn *conn = smp->conn; - struct hci_conn *hcon = conn->hcon; struct smp_cmd_pairing *local, *remote; u8 local_mitm, remote_mitm, local_io, remote_io, method; @@ -2708,7 +2716,7 @@ static u8 sc_select_method(struct smp_chan *smp) * the "struct smp_cmd_pairing" from them we need to skip the * first byte which contains the opcode. */ - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local = (void *) &smp->preq[1]; remote = (void *) &smp->prsp[1]; } else { @@ -2777,7 +2785,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* Non-initiating device sends its public key after receiving * the key from the initiating device. */ - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { err = sc_send_public_key(smp); if (err) return err; @@ -2839,7 +2847,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) } if (smp->method == REQ_OOB) { - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd), smp->prnd); @@ -2848,7 +2856,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) return 0; } - if (hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_CONFIRM); if (smp->method == REQ_PASSKEY) { @@ -2863,7 +2871,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) /* The Initiating device waits for the non-initiating device to * send the confirm value. */ - if (conn->hcon->out) + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) return 0; err = smp_f4(smp->tfm_cmac, smp->local_pk, smp->remote_pk, smp->prnd, @@ -2897,7 +2905,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) a[6] = hcon->init_addr_type; b[6] = hcon->resp_addr_type; - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { local_addr = a; remote_addr = b; memcpy(io_cap, &smp->prsp[1], 3); @@ -2922,7 +2930,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) if (crypto_memneq(check->e, e, 16)) return SMP_DHKEY_CHECK_FAILED; - if (!hcon->out) { + if (!test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { if (test_bit(SMP_FLAG_WAIT_USER, &smp->flags)) { set_bit(SMP_FLAG_DHKEY_PENDING, &smp->flags); return 0; @@ -2934,7 +2942,7 @@ static int smp_cmd_dhkey_check(struct l2cap_conn *conn, struct sk_buff *skb) sc_add_ltk(smp); - if (hcon->out) { + if (test_bit(SMP_FLAG_INITIATOR, &smp->flags)) { hci_le_start_enc(hcon, 0, 0, smp->tk, smp->enc_key_size); hcon->enc_key_size = smp->enc_key_size; } @@ -3083,7 +3091,6 @@ static void bredr_pairing(struct l2cap_chan *chan) struct l2cap_conn *conn = chan->conn; struct hci_conn *hcon = conn->hcon; struct hci_dev *hdev = hcon->hdev; - struct smp_cmd_pairing req; struct smp_chan *smp; bt_dev_dbg(hdev, "chan %p", chan); @@ -3135,14 +3142,7 @@ static void bredr_pairing(struct l2cap_chan *chan) bt_dev_dbg(hdev, "starting SMP over BR/EDR"); - /* Prepare and send the BR/EDR SMP Pairing Request */ - build_bredr_pairing_cmd(smp, &req, NULL); - - smp->preq[0] = SMP_CMD_PAIRING_REQ; - memcpy(&smp->preq[1], &req, sizeof(req)); - - smp_send_cmd(conn, SMP_CMD_PAIRING_REQ, sizeof(req), &req); - SMP_ALLOW_CMD(smp, SMP_CMD_PAIRING_RSP); + smp_send_pairing_req(smp, 0x00); } static void smp_resume_cb(struct l2cap_chan *chan) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9a1cb5079a7a0758b241f8425ea4153045e72d07..b2ae0d2434d2ebb143ff9005aef033b40bde5e2f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -2045,16 +2045,14 @@ void br_multicast_del_port(struct net_bridge_port *port) { struct net_bridge *br = port->br; struct net_bridge_port_group *pg; - HLIST_HEAD(deleted_head); struct hlist_node *n; /* Take care of the remaining groups, only perm ones should be left */ spin_lock_bh(&br->multicast_lock); hlist_for_each_entry_safe(pg, n, &port->mglist, mglist) br_multicast_find_del_pg(br, pg); - hlist_move_list(&br->mcast_gc_list, &deleted_head); spin_unlock_bh(&br->multicast_lock); - br_multicast_gc(&deleted_head); + flush_work(&br->mcast_gc_work); br_multicast_port_ctx_deinit(&port->multicast_ctx); free_percpu(port->mcast_stats); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 09f6a773a7080a48ab074bdff185cf30149ff2d9..8f9c19d992ac5ce333f8d59ff7628a2335f8cdfc 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -622,8 +622,12 @@ static unsigned int br_nf_local_in(void *priv, if (likely(nf_ct_is_confirmed(ct))) return NF_ACCEPT; + if (WARN_ON_ONCE(refcount_read(&nfct->use) != 1)) { + nf_reset_ct(skb); + return NF_ACCEPT; + } + WARN_ON_ONCE(skb_shared(skb)); - WARN_ON_ONCE(refcount_read(&nfct->use) != 1); /* We can't call nf_confirm here, it would create a dependency * on nf_conntrack module. diff --git a/net/core/dev.c b/net/core/dev.c index 6ea1d20676fbe9da53c1b7491fc87202bb100649..f66e614078832842e903fc2ee792cd89b8a3572f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5150,6 +5150,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff **pskb) bpf_net_ctx_clear(bpf_net_ctx); return XDP_DROP; } + bpf_net_ctx_clear(bpf_net_ctx); } return XDP_PASS; out_redir: @@ -9911,6 +9912,15 @@ static void netdev_sync_lower_features(struct net_device *upper, } } +static bool netdev_has_ip_or_hw_csum(netdev_features_t features) +{ + netdev_features_t ip_csum_mask = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; + bool ip_csum = (features & ip_csum_mask) == ip_csum_mask; + bool hw_csum = features & NETIF_F_HW_CSUM; + + return ip_csum || hw_csum; +} + static netdev_features_t netdev_fix_features(struct net_device *dev, netdev_features_t features) { @@ -9992,15 +10002,9 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_LRO; } - if (features & NETIF_F_HW_TLS_TX) { - bool ip_csum = (features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) == - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - bool hw_csum = features & NETIF_F_HW_CSUM; - - if (!ip_csum && !hw_csum) { - netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); - features &= ~NETIF_F_HW_TLS_TX; - } + if ((features & NETIF_F_HW_TLS_TX) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping TLS TX HW offload feature since no CSUM feature.\n"); + features &= ~NETIF_F_HW_TLS_TX; } if ((features & NETIF_F_HW_TLS_RX) && !(features & NETIF_F_RXCSUM)) { @@ -10008,6 +10012,11 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_HW_TLS_RX; } + if ((features & NETIF_F_GSO_UDP_L4) && !netdev_has_ip_or_hw_csum(features)) { + netdev_dbg(dev, "Dropping USO feature since no CSUM feature.\n"); + features &= ~NETIF_F_GSO_UDP_L4; + } + return features; } diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 8ec35194bfcb8574f53a9fd28f0cb2ebfe9a3f2e..ab150641142aa1545c71fc5d3b11db33c70cf437 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -148,9 +148,9 @@ static void linkwatch_schedule_work(int urgent) * override the existing timer. */ if (test_bit(LW_URGENT, &linkwatch_flags)) - mod_delayed_work(system_wq, &linkwatch_work, 0); + mod_delayed_work(system_unbound_wq, &linkwatch_work, 0); else - schedule_delayed_work(&linkwatch_work, delay); + queue_delayed_work(system_unbound_wq, &linkwatch_work, delay); } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 55bcacf67df3b672280c905c3572cd86d773fa3d..d657b042d5a048d98ed123acab62182e6f7acba9 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,7 +228,6 @@ void netpoll_poll_disable(struct net_device *dev) down(&ni->dev_lock); srcu_read_unlock(&netpoll_srcu, idx); } -EXPORT_SYMBOL(netpoll_poll_disable); void netpoll_poll_enable(struct net_device *dev) { @@ -239,7 +238,6 @@ void netpoll_poll_enable(struct net_device *dev) up(&ni->dev_lock); rcu_read_unlock(); } -EXPORT_SYMBOL(netpoll_poll_enable); static void refill_skbs(void) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 87e67194f24046a8420bbb51c19fb0a686b9b06b..73fd7f543fd0954ba3f4a2291675047831970024 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3288,7 +3288,7 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (ifm->ifi_index > 0) dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME] || tb[IFLA_ALT_IFNAME]) - dev = rtnl_dev_get(net, tb); + dev = rtnl_dev_get(tgt_net, tb); else if (tb[IFLA_GROUP]) err = rtnl_group_dellink(tgt_net, nla_get_u32(tb[IFLA_GROUP])); else diff --git a/net/dsa/tag.c b/net/dsa/tag.c index 6e402d49afd3e8ccf9034c8300963cc54f01494d..79ad105902d9780de56cfac7852abbd80829ffa8 100644 --- a/net/dsa/tag.c +++ b/net/dsa/tag.c @@ -105,8 +105,9 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, p = netdev_priv(skb->dev); - if (unlikely(cpu_dp->ds->untag_bridge_pvid)) { - nskb = dsa_untag_bridge_pvid(skb); + if (unlikely(cpu_dp->ds->untag_bridge_pvid || + cpu_dp->ds->untag_vlan_aware_bridge_pvid)) { + nskb = dsa_software_vlan_untag(skb); if (!nskb) { kfree_skb(skb); return 0; diff --git a/net/dsa/tag.h b/net/dsa/tag.h index f6b9c73718dfa2f1a873d7ed0ca0bff0904bd413..d5707870906bc93ad8d5b66d4eb85530d55ab01a 100644 --- a/net/dsa/tag.h +++ b/net/dsa/tag.h @@ -44,46 +44,81 @@ static inline struct net_device *dsa_conduit_find_user(struct net_device *dev, return NULL; } -/* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged - * frames as untagged, since the bridge will not untag them. +/** + * dsa_software_untag_vlan_aware_bridge: Software untagging for VLAN-aware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge can process tagged packets. Software like STP/PTP may not. The + * bridge can also process untagged packets, to the same effect as if they were + * tagged with the PVID of the ingress port. So packets tagged with the PVID of + * the bridge port must be software-untagged, to support both use cases. */ -static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) +static inline void dsa_software_untag_vlan_aware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) { - struct dsa_port *dp = dsa_user_to_port(skb->dev); - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct net_device *dev = skb->dev; - struct net_device *upper_dev; - u16 vid, pvid, proto; + u16 pvid, proto; int err; - if (!br || br_vlan_enabled(br)) - return skb; - err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - /* Move VLAN tag from data to hwaccel */ - if (!skb_vlan_tag_present(skb) && skb->protocol == htons(proto)) { - skb = skb_vlan_untag(skb); - if (!skb) - return NULL; - } + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; - if (!skb_vlan_tag_present(skb)) - return skb; + if (vid == pvid && skb->vlan_proto == htons(proto)) + __vlan_hwaccel_clear_tag(skb); +} - vid = skb_vlan_tag_get_id(skb); +/** + * dsa_software_untag_vlan_unaware_bridge: Software untagging for VLAN-unaware bridge + * @skb: Pointer to received socket buffer (packet) + * @br: Pointer to bridge upper interface of ingress port + * @vid: Parsed VID from packet + * + * The bridge ignores all VLAN tags. Software like STP/PTP may not (it may run + * on the plain port, or on a VLAN upper interface). Maybe packets are coming + * to software as tagged with a driver-defined VID which is NOT equal to the + * PVID of the bridge port (since the bridge is VLAN-unaware, its configuration + * should NOT be committed to hardware). DSA needs a method for this private + * VID to be communicated by software to it, and if packets are tagged with it, + * software-untag them. Note: the private VID may be different per bridge, to + * support the FDB isolation use case. + * + * FIXME: this is currently implemented based on the broken assumption that + * the "private VID" used by the driver in VLAN-unaware mode is equal to the + * bridge PVID. It should not be, except for a coincidence; the bridge PVID is + * irrelevant to the data path in the VLAN-unaware mode. Thus, the VID that + * this function removes is wrong. + * + * All users of ds->untag_bridge_pvid should fix their drivers, if necessary, + * to make the two independent. Only then, if there still remains a need to + * strip the private VID from packets, then a new ds->ops->get_private_vid() + * API shall be introduced to communicate to DSA what this VID is, which needs + * to be stripped here. + */ +static inline void dsa_software_untag_vlan_unaware_bridge(struct sk_buff *skb, + struct net_device *br, + u16 vid) +{ + struct net_device *upper_dev; + u16 pvid, proto; + int err; - /* We already run under an RCU read-side critical section since - * we are called from netif_receive_skb_list_internal(). - */ - err = br_vlan_get_pvid_rcu(dev, &pvid); + err = br_vlan_get_proto(br, &proto); if (err) - return skb; + return; - if (vid != pvid) - return skb; + err = br_vlan_get_pvid_rcu(skb->dev, &pvid); + if (err) + return; + + if (vid != pvid || skb->vlan_proto != htons(proto)) + return; /* The sad part about attempting to untag from DSA is that we * don't know, unless we check, if the skb will end up in @@ -95,10 +130,50 @@ static inline struct sk_buff *dsa_untag_bridge_pvid(struct sk_buff *skb) * definitely keep the tag, to make sure it keeps working. */ upper_dev = __vlan_find_dev_deep_rcu(br, htons(proto), vid); - if (upper_dev) + if (!upper_dev) + __vlan_hwaccel_clear_tag(skb); +} + +/** + * dsa_software_vlan_untag: Software VLAN untagging in DSA receive path + * @skb: Pointer to socket buffer (packet) + * + * Receive path method for switches which cannot avoid tagging all packets + * towards the CPU port. Called when ds->untag_bridge_pvid (legacy) or + * ds->untag_vlan_aware_bridge_pvid is set to true. + * + * As a side effect of this method, any VLAN tag from the skb head is moved + * to hwaccel. + */ +static inline struct sk_buff *dsa_software_vlan_untag(struct sk_buff *skb) +{ + struct dsa_port *dp = dsa_user_to_port(skb->dev); + struct net_device *br = dsa_port_bridge_dev_get(dp); + u16 vid; + + /* software untagging for standalone ports not yet necessary */ + if (!br) return skb; - __vlan_hwaccel_clear_tag(skb); + /* Move VLAN tag from data to hwaccel */ + if (!skb_vlan_tag_present(skb)) { + skb = skb_vlan_untag(skb); + if (!skb) + return NULL; + } + + if (!skb_vlan_tag_present(skb)) + return skb; + + vid = skb_vlan_tag_get_id(skb); + + if (br_vlan_enabled(br)) { + if (dp->ds->untag_vlan_aware_bridge_pvid) + dsa_software_untag_vlan_aware_bridge(skb, br, vid); + } else { + if (dp->ds->untag_bridge_pvid) + dsa_software_untag_vlan_unaware_bridge(skb, br, vid); + } return skb; } diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index e0e4300bfbd3fcb2404a7dd0aca617c3d5004454..bf6608fc6be70e8a1aa9dc12d79371c17b147503 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -8,40 +8,6 @@ #define OCELOT_NAME "ocelot" #define SEVILLE_NAME "seville" -/* If the port is under a VLAN-aware bridge, remove the VLAN header from the - * payload and move it into the DSA tag, which will make the switch classify - * the packet to the bridge VLAN. Otherwise, leave the classified VLAN at zero, - * which is the pvid of standalone and VLAN-unaware bridge ports. - */ -static void ocelot_xmit_get_vlan_info(struct sk_buff *skb, struct dsa_port *dp, - u64 *vlan_tci, u64 *tag_type) -{ - struct net_device *br = dsa_port_bridge_dev_get(dp); - struct vlan_ethhdr *hdr; - u16 proto, tci; - - if (!br || !br_vlan_enabled(br)) { - *vlan_tci = 0; - *tag_type = IFH_TAG_TYPE_C; - return; - } - - hdr = skb_vlan_eth_hdr(skb); - br_vlan_get_proto(br, &proto); - - if (ntohs(hdr->h_vlan_proto) == proto) { - vlan_remove_tag(skb, &tci); - *vlan_tci = tci; - } else { - rcu_read_lock(); - br_vlan_get_pvid_rcu(br, &tci); - rcu_read_unlock(); - *vlan_tci = tci; - } - - *tag_type = (proto != ETH_P_8021Q) ? IFH_TAG_TYPE_S : IFH_TAG_TYPE_C; -} - static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, __be32 ifh_prefix, void **ifh) { @@ -53,7 +19,8 @@ static void ocelot_xmit_common(struct sk_buff *skb, struct net_device *netdev, u32 rew_op = 0; u64 qos_class; - ocelot_xmit_get_vlan_info(skb, dp, &vlan_tci, &tag_type); + ocelot_xmit_get_vlan_info(skb, dsa_port_bridge_dev_get(dp), &vlan_tci, + &tag_type); qos_class = netdev_get_num_tc(netdev) ? netdev_get_prio_tc_map(netdev, skb->priority) : skb->priority; diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c index ae4b4b28a6014f5b18cce788a14b0e8d78224084..655ff5224ffa30ff721998f38e2b22b1545c68fa 100644 --- a/net/ethtool/cmis_fw_update.c +++ b/net/ethtool/cmis_fw_update.c @@ -35,7 +35,10 @@ struct cmis_cdb_fw_mng_features_rpl { __be16 resv7; }; -#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 +enum cmis_cdb_fw_write_mechanism { + CMIS_CDB_FW_WRITE_MECHANISM_LPL = 0x01, + CMIS_CDB_FW_WRITE_MECHANISM_BOTH = 0x11, +}; static int cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, @@ -64,7 +67,8 @@ cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, } rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; - if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL || + rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_BOTH)) { ethnl_module_fw_flash_ntf_err(dev, ntf_params, "Write LPL is not supported", NULL); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 983fee76f5cf15e6b9142e32c632b175d6a8df5e..e18823bf23306dffd32ddc752458e3c350a29b42 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1331,13 +1331,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]); const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; + u32 user_indir_len = 0, indir_bytes = 0; struct ethtool_rxfh_param rxfh_dev = {}; struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; bool locked = false; /* dev->ethtool->rss_lock taken */ - u32 indir_bytes = 0; bool create = false; u8 *rss_config; int ret; @@ -1369,23 +1369,25 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EOPNOTSUPP; create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; - /* If either indir, hash key or function is valid, proceed further. - * Must request at least one change: indir size, hash key, function - * or input transformation. - */ if ((rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE && rxfh.indir_size != dev_indir_size) || - (rxfh.key_size && (rxfh.key_size != dev_key_size)) || + (rxfh.key_size && rxfh.key_size != dev_key_size)) + return -EINVAL; + + /* Must request at least one change: indir size, hash key, function + * or input transformation. + * There's no need for any of it in case of context creation. + */ + if (!create && (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE && rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE && rxfh.input_xfrm == RXH_XFRM_NO_CHANGE)) return -EINVAL; - if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); + indir_bytes = dev_indir_size * sizeof(rxfh_dev.indir[0]); - rss_config = kzalloc(indir_bytes + rxfh.key_size, GFP_USER); + rss_config = kzalloc(indir_bytes + dev_key_size, GFP_USER); if (!rss_config) return -ENOMEM; @@ -1400,6 +1402,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, */ if (rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) { + user_indir_len = indir_bytes; rxfh_dev.indir = (u32 *)rss_config; rxfh_dev.indir_size = dev_indir_size; ret = ethtool_copy_validate_indir(rxfh_dev.indir, @@ -1426,7 +1429,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.key_size = dev_key_size; rxfh_dev.key = rss_config + indir_bytes; if (copy_from_user(rxfh_dev.key, - useraddr + rss_cfg_offset + indir_bytes, + useraddr + rss_cfg_offset + user_indir_len, rxfh.key_size)) { ret = -EFAULT; goto out; @@ -1449,12 +1452,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } if (ops->create_rxfh_context) { - u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 limit = ops->rxfh_max_num_contexts ?: U32_MAX; u32 ctx_id; /* driver uses new API, core allocates ID */ ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, - XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + XA_LIMIT(1, limit - 1), + GFP_KERNEL_ACCOUNT); if (ret < 0) { kfree(ctx); goto out; @@ -1474,16 +1478,21 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.input_xfrm = rxfh.input_xfrm; if (rxfh.rss_context && ops->create_rxfh_context) { - if (create) + if (create) { ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, extack); - else if (rxfh_dev.rss_delete) + /* Make sure driver populates defaults */ + WARN_ON_ONCE(!ret && !rxfh_dev.key && + !memchr_inv(ethtool_rxfh_context_key(ctx), + 0, ctx->key_size)); + } else if (rxfh_dev.rss_delete) { ret = ops->remove_rxfh_context(dev, ctx, rxfh.rss_context, extack); - else + } else { ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, extack); + } } else { ret = ops->set_rxfh(dev, &rxfh_dev, extack); } @@ -1522,6 +1531,22 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, kfree(ctx); goto out; } + + /* Fetch the defaults for the old API, in the new API drivers + * should write defaults into ctx themselves. + */ + rxfh_dev.indir = (u32 *)rss_config; + rxfh_dev.indir_size = dev_indir_size; + + rxfh_dev.key = rss_config + indir_bytes; + rxfh_dev.key_size = dev_key_size; + + ret = ops->get_rxfh(dev, &rxfh_dev); + if (WARN_ON(ret)) { + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + goto out; + } } if (rxfh_dev.rss_delete) { WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); @@ -1530,12 +1555,14 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (rxfh_dev.indir) { for (i = 0; i < dev_indir_size; i++) ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; - ctx->indir_configured = 1; + ctx->indir_configured = + rxfh.indir_size && + rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, dev_key_size); - ctx->key_configured = 1; + ctx->key_configured = !!rxfh.key_size; } if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) ctx->hfunc = rxfh_dev.hfunc; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 71679137eff21820a88c25dd75a7bf115e899154..5c4c4505ab9a4a4497e6cfd669e3506c95511c81 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -111,7 +111,8 @@ rss_reply_size(const struct ethnl_req_info *req_base, const struct rss_reply_data *data = RSS_REPDATA(reply_base); int len; - len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + len = nla_total_size(sizeof(u32)) + /* _RSS_CONTEXT */ + nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -124,6 +125,11 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, const struct ethnl_reply_data *reply_base) { const struct rss_reply_data *data = RSS_REPDATA(reply_base); + struct rss_req_info *request = RSS_REQINFO(req_base); + + if (request->rss_context && + nla_put_u32(skb, ETHTOOL_A_RSS_CONTEXT, request->rss_context)) + return -EMSGSIZE; if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 4d42d0756fd70c980dac9620b1d778c6721ee28d..a5db7c67d61befe2a064c82051ff8ac7d65ac590 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -145,25 +145,27 @@ static struct pernet_operations iptable_nat_net_ops = { static int __init iptable_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv4_table, - iptable_nat_table_init); + int ret; + /* net->gen->ptr[iptable_nat_net_id] must be allocated + * before calling iptable_nat_table_init(). + */ + ret = register_pernet_subsys(&iptable_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&iptable_nat_net_ops); - if (ret < 0) { - xt_unregister_template(&nf_nat_ipv4_table); - return ret; - } + ret = xt_register_template(&nf_nat_ipv4_table, + iptable_nat_table_init); + if (ret < 0) + unregister_pernet_subsys(&iptable_nat_net_ops); return ret; } static void __exit iptable_nat_exit(void) { - unregister_pernet_subsys(&iptable_nat_net_ops); xt_unregister_template(&nf_nat_ipv4_table); + unregister_pernet_subsys(&iptable_nat_net_ops); } module_init(iptable_nat_init); diff --git a/net/ipv4/tcp_ao.c b/net/ipv4/tcp_ao.c index 85531437890cee662a2b63c2fbfdae73d6285642..db6516092daf5b180fb75482fb711f226451a647 100644 --- a/net/ipv4/tcp_ao.c +++ b/net/ipv4/tcp_ao.c @@ -267,32 +267,49 @@ static void tcp_ao_key_free_rcu(struct rcu_head *head) kfree_sensitive(key); } -void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +static void tcp_ao_info_free_rcu(struct rcu_head *head) { - struct tcp_ao_info *ao; + struct tcp_ao_info *ao = container_of(head, struct tcp_ao_info, rcu); struct tcp_ao_key *key; struct hlist_node *n; + hlist_for_each_entry_safe(key, n, &ao->head, node) { + hlist_del(&key->node); + tcp_sigpool_release(key->tcp_sigpool_id); + kfree_sensitive(key); + } + kfree(ao); + static_branch_slow_dec_deferred(&tcp_ao_needed); +} + +static void tcp_ao_sk_omem_free(struct sock *sk, struct tcp_ao_info *ao) +{ + size_t total_ao_sk_mem = 0; + struct tcp_ao_key *key; + + hlist_for_each_entry(key, &ao->head, node) + total_ao_sk_mem += tcp_ao_sizeof_key(key); + atomic_sub(total_ao_sk_mem, &sk->sk_omem_alloc); +} + +void tcp_ao_destroy_sock(struct sock *sk, bool twsk) +{ + struct tcp_ao_info *ao; + if (twsk) { ao = rcu_dereference_protected(tcp_twsk(sk)->ao_info, 1); - tcp_twsk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_twsk(sk)->ao_info, NULL); } else { ao = rcu_dereference_protected(tcp_sk(sk)->ao_info, 1); - tcp_sk(sk)->ao_info = NULL; + rcu_assign_pointer(tcp_sk(sk)->ao_info, NULL); } if (!ao || !refcount_dec_and_test(&ao->refcnt)) return; - hlist_for_each_entry_safe(key, n, &ao->head, node) { - hlist_del_rcu(&key->node); - if (!twsk) - atomic_sub(tcp_ao_sizeof_key(key), &sk->sk_omem_alloc); - call_rcu(&key->rcu, tcp_ao_key_free_rcu); - } - - kfree_rcu(ao, rcu); - static_branch_slow_dec_deferred(&tcp_ao_needed); + if (!twsk) + tcp_ao_sk_omem_free(sk, ao); + call_rcu(&ao->rcu, tcp_ao_info_free_rcu); } void tcp_ao_time_wait(struct tcp_timewait_sock *tcptw, struct tcp_sock *tp) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 454362e359da56c83a00a2523ca09c61c563608e..e37488d3453f03b68928718a69559581b436812b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -238,9 +238,14 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) */ if (unlikely(len != icsk->icsk_ack.rcv_mss)) { u64 val = (u64)skb->len << TCP_RMEM_TO_WIN_SCALE; + u8 old_ratio = tcp_sk(sk)->scaling_ratio; do_div(val, skb->truesize); tcp_sk(sk)->scaling_ratio = val ? val : 1; + + if (old_ratio != tcp_sk(sk)->scaling_ratio) + WRITE_ONCE(tcp_sk(sk)->window_clamp, + tcp_win_from_space(sk, sk->sk_rcvbuf)); } icsk->icsk_ack.rcv_mss = min_t(unsigned int, len, tcp_sk(sk)->advmss); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index fd17f25ff288a47fca3ec1881c87d56bd9989709..a4e510846905e662f4c7bb5b7e2508ca4dc77d36 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -97,6 +97,8 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; +static DEFINE_MUTEX(tcp_exit_batch_mutex); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -3514,6 +3516,16 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; + /* make sure concurrent calls to tcp_sk_exit_batch from net_cleanup_work + * and failed setup_net error unwinding path are serialized. + * + * tcp_twsk_purge() handles twsk in any dead netns, not just those in + * net_exit_list, the thread that dismantles a particular twsk must + * do so without other thread progressing to refcount_dec_and_test() of + * tcp_death_row.tw_refcount. + */ + mutex_lock(&tcp_exit_batch_mutex); + tcp_twsk_purge(net_exit_list); list_for_each_entry(net, net_exit_list, exit_list) { @@ -3521,6 +3533,8 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); } + + mutex_unlock(&tcp_exit_batch_mutex); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 4b791e74529e157ead943e4faa5b3b90869c7273..e4ad3311e14895ab898a653c3a29cd8af3e42dff 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -140,6 +140,9 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, if (thlen < sizeof(*th)) goto out; + if (unlikely(skb_checksum_start(skb) != skb_transport_header(skb))) + goto out; + if (!pskb_may_pull(skb, thlen)) goto out; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index aa2e0a28ca61388637616b2860da12e6d144e6dd..d842303587af93fc58454c0a2da1b6f70c55eec8 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -278,6 +278,17 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, if (gso_skb->len <= sizeof(*uh) + mss) return ERR_PTR(-EINVAL); + if (unlikely(skb_checksum_start(gso_skb) != + skb_transport_header(gso_skb) && + !(skb_shinfo(gso_skb)->gso_type & SKB_GSO_FRAGLIST))) + return ERR_PTR(-EINVAL); + + /* We don't know if egress device can segment and checksum the packet + * when IPv6 extension headers are present. Fall back to software GSO. + */ + if (gso_skb->ip_summed != CHECKSUM_PARTIAL) + features &= ~(NETIF_F_GSO_UDP_L4 | NETIF_F_CSUM_MASK); + if (skb_gso_ok(gso_skb, features | NETIF_F_GSO_ROBUST)) { /* Packet is from an untrusted source, reset gso_segs. */ skb_shinfo(gso_skb)->gso_segs = DIV_ROUND_UP(gso_skb->len - sizeof(*uh), diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index ab504d31f0cdd8dec9ab01bf9d6e6517307609cd..f26841f1490f5c31e3b09bce5773391e779128e1 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -70,11 +70,15 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * /* Be paranoid, rather than too clever. */ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, hh_len); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOMEM; } + rcu_read_unlock(); } hdr = ipv6_hdr(skb); @@ -283,11 +287,15 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, head_room += opt->opt_nflen + opt->opt_flen; if (unlikely(head_room > skb_headroom(skb))) { + /* Make sure idev stays alive */ + rcu_read_lock(); skb = skb_expand_head(skb, head_room); if (!skb) { IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + rcu_read_unlock(); return -ENOBUFS; } + rcu_read_unlock(); } if (opt) { @@ -1956,6 +1964,7 @@ int ip6_send_skb(struct sk_buff *skb) struct rt6_info *rt = dst_rt6_info(skb_dst(skb)); int err; + rcu_read_lock(); err = ip6_local_out(net, skb->sk, skb); if (err) { if (err > 0) @@ -1965,6 +1974,7 @@ int ip6_send_skb(struct sk_buff *skb) IPSTATS_MIB_OUTDISCARDS); } + rcu_read_unlock(); return err; } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9dee0c12795540df27edfa7d19b797fe0ef0e855..87dfb565a9f81b663a06f07df63d2604c9a85cac 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1507,7 +1507,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) tdev = __dev_get_by_index(t->net, p->link); if (tdev) { - dev->hard_header_len = tdev->hard_header_len + t_hlen; + dev->needed_headroom = tdev->hard_header_len + + tdev->needed_headroom + t_hlen; mtu = min_t(unsigned int, tdev->mtu, IP6_MAX_MTU); mtu = mtu - t_hlen; @@ -1731,7 +1732,9 @@ ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr, int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) { struct ip6_tnl *tnl = netdev_priv(dev); + int t_hlen; + t_hlen = tnl->hlen + sizeof(struct ipv6hdr); if (tnl->parms.proto == IPPROTO_IPV6) { if (new_mtu < IPV6_MIN_MTU) return -EINVAL; @@ -1740,10 +1743,10 @@ int ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return -EINVAL; } if (tnl->parms.proto == IPPROTO_IPV6 || tnl->parms.proto == 0) { - if (new_mtu > IP6_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP6_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } else { - if (new_mtu > IP_MAX_MTU - dev->hard_header_len) + if (new_mtu > IP_MAX_MTU - dev->hard_header_len - t_hlen) return -EINVAL; } WRITE_ONCE(dev->mtu, new_mtu); @@ -1887,12 +1890,11 @@ ip6_tnl_dev_init_gen(struct net_device *dev) t_hlen = t->hlen + sizeof(struct ipv6hdr); dev->type = ARPHRD_TUNNEL6; - dev->hard_header_len = LL_MAX_HEADER + t_hlen; dev->mtu = ETH_DATA_LEN - t_hlen; if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->min_mtu = ETH_MIN_MTU; - dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len; + dev->max_mtu = IP6_MAX_MTU - dev->hard_header_len - t_hlen; netdev_hold(dev, &t->dev_tracker, GFP_KERNEL); netdev_lockdep_set_classes(dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 70a0b2ad6bd70c2d5152ab59d2834a63587e6652..b8eec1b6cc2c88bfbf66fec165a6a8782ffd8242 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -227,6 +227,7 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, return NULL; memset(ndopts, 0, sizeof(*ndopts)); while (opt_len) { + bool unknown = false; int l; if (opt_len < sizeof(struct nd_opt_hdr)) return NULL; @@ -262,22 +263,23 @@ struct ndisc_options *ndisc_parse_options(const struct net_device *dev, break; #endif default: - if (ndisc_is_useropt(dev, nd_opt)) { - ndopts->nd_useropts_end = nd_opt; - if (!ndopts->nd_useropts) - ndopts->nd_useropts = nd_opt; - } else { - /* - * Unknown options must be silently ignored, - * to accommodate future extension to the - * protocol. - */ - ND_PRINTK(2, notice, - "%s: ignored unsupported option; type=%d, len=%d\n", - __func__, - nd_opt->nd_opt_type, - nd_opt->nd_opt_len); - } + unknown = true; + } + if (ndisc_is_useropt(dev, nd_opt)) { + ndopts->nd_useropts_end = nd_opt; + if (!ndopts->nd_useropts) + ndopts->nd_useropts = nd_opt; + } else if (unknown) { + /* + * Unknown options must be silently ignored, + * to accommodate future extension to the + * protocol. + */ + ND_PRINTK(2, notice, + "%s: ignored unsupported option; type=%d, len=%d\n", + __func__, + nd_opt->nd_opt_type, + nd_opt->nd_opt_len); } next_opt: opt_len -= l; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 52cf104e347881f052f1d515e78f05815bf7f45b..e119d4f090cc88d91a81a73be40c76d71e948018 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -147,23 +147,27 @@ static struct pernet_operations ip6table_nat_net_ops = { static int __init ip6table_nat_init(void) { - int ret = xt_register_template(&nf_nat_ipv6_table, - ip6table_nat_table_init); + int ret; + /* net->gen->ptr[ip6table_nat_net_id] must be allocated + * before calling ip6t_nat_register_lookups(). + */ + ret = register_pernet_subsys(&ip6table_nat_net_ops); if (ret < 0) return ret; - ret = register_pernet_subsys(&ip6table_nat_net_ops); + ret = xt_register_template(&nf_nat_ipv6_table, + ip6table_nat_table_init); if (ret) - xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); return ret; } static void __exit ip6table_nat_exit(void) { - unregister_pernet_subsys(&ip6table_nat_net_ops); xt_unregister_template(&nf_nat_ipv6_table); + unregister_pernet_subsys(&ip6table_nat_net_ops); } module_init(ip6table_nat_init); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f0844c9315d1819c0b1c48dd767148fca244b81..4120e67a8ce6bb44e3396ec6f66e564aae97aeec 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -154,6 +154,10 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user, }; struct inet_frag_queue *q; + if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST | + IPV6_ADDR_LINKLOCAL))) + key.iif = 0; + q = inet_frag_find(nf_frag->fqdir, &key); if (!q) return NULL; diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index c3b0b610b0aa35e9afbd74148efc6e8bb63b2822..c00323fa9eb66ed71eddf7792773b8a028b33b74 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -335,8 +335,8 @@ static void iucv_sever_path(struct sock *sk, int with_user_data) struct iucv_sock *iucv = iucv_sk(sk); struct iucv_path *path = iucv->path; - if (iucv->path) { - iucv->path = NULL; + /* Whoever resets the path pointer, must sever and free it. */ + if (xchg(&iucv->path, NULL)) { if (with_user_data) { low_nmcpy(user_data, iucv->src_name); high_nmcpy(user_data, iucv->dst_name); diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 1e42e13ad24e36481e4039bf09f62d2229bb483a..d3e9efab7f4bd2a2251ab98d098d57d398fdb2a4 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -86,13 +86,15 @@ struct device *iucv_alloc_device(const struct attribute_group **attrs, { struct device *dev; va_list vargs; + char buf[20]; int rc; dev = kzalloc(sizeof(*dev), GFP_KERNEL); if (!dev) goto out_error; va_start(vargs, fmt); - rc = dev_set_name(dev, fmt, vargs); + vsnprintf(buf, sizeof(buf), fmt, vargs); + rc = dev_set_name(dev, "%s", buf); va_end(vargs); if (rc) goto out_error; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 2f191e50d4fc94e64ab21018380d2af2aee05e83..d4118c796290e5d2ab2d12ad226d89638c8aeaf6 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -755,6 +755,7 @@ static int kcm_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) !(msg->msg_flags & MSG_MORE) : !!(msg->msg_flags & MSG_EOR); int err = -EPIPE; + mutex_lock(&kcm->tx_mutex); lock_sock(sk); /* Per tcp_sendmsg this should be in poll */ @@ -926,6 +927,7 @@ partial_message: KCM_STATS_ADD(kcm->stats.tx_bytes, copied); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return copied; out_error: @@ -951,6 +953,7 @@ out_error: sk->sk_write_space(sk); release_sock(sk); + mutex_unlock(&kcm->tx_mutex); return err; } @@ -1204,6 +1207,7 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->lock); INIT_WORK(&kcm->tx_work, kcm_tx_work); + mutex_init(&kcm->tx_mutex); spin_lock_bh(&mux->rx_lock); kcm_rcv_ready(kcm); diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index c80ab3f26084fb77b37f321d3bade521be66848b..2e86f520f799410aa946e0389a84fb58714941dd 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -86,6 +86,11 @@ /* Default trace flags */ #define L2TP_DEFAULT_DEBUG_FLAGS 0 +#define L2TP_DEPTH_NESTING 2 +#if L2TP_DEPTH_NESTING == SINGLE_DEPTH_NESTING +#error "L2TP requires its own lockdep subclass" +#endif + /* Private data stored for received packets in the skb. */ struct l2tp_skb_cb { @@ -1124,7 +1129,13 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); nf_reset_ct(skb); - bh_lock_sock_nested(sk); + /* L2TP uses its own lockdep subclass to avoid lockdep splats caused by + * nested socket calls on the same lockdep socket class. This can + * happen when data from a user socket is routed over l2tp, which uses + * another userspace socket. + */ + spin_lock_nested(&sk->sk_lock.slock, L2TP_DEPTH_NESTING); + if (sock_owned_by_user(sk)) { kfree_skb(skb); ret = NET_XMIT_DROP; @@ -1176,7 +1187,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns ret = l2tp_xmit_queue(tunnel, skb, &inet->cork.fl); out_unlock: - bh_unlock_sock(sk); + spin_unlock(&sk->sk_lock.slock); return ret; } diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 85cb71de370f297fa948c590729e0c7764fb25b3..b02b84ce2130761354829ee17b5ea9d85f8d1f81 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -114,7 +114,7 @@ static int ieee80211_set_mon_options(struct ieee80211_sub_if_data *sdata, /* apply all changes now - no failures allowed */ - if (monitor_sdata) + if (monitor_sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) ieee80211_set_mu_mimo_follow(monitor_sdata, params); if (params->flags) { @@ -3053,6 +3053,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { + if (!ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) + return -EOPNOTSUPP; + sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (!sdata) @@ -3115,7 +3118,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, if (has_monitor) { sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { sdata->deflink.user_power_level = local->user_power_level; if (txp_type != sdata->vif.bss_conf.txpower_type) update_txp_type = true; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 72a9ba8bc5fd9764068e00d42755f3ba922418de..edba4a31844fb6251f35e249ea59ea74e951de2f 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1768,7 +1768,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, break; } sdata = rcu_dereference(local->monitor_sdata); - if (sdata) { + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; @@ -3957,7 +3957,8 @@ begin: break; } tx.sdata = rcu_dereference(local->monitor_sdata); - if (tx.sdata) { + if (tx.sdata && + ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF)) { vif = &tx.sdata->vif; info->hw_queue = vif->hw_queue[skb_get_queue_mapping(skb)]; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index ced19ce7c51a33198dbec12362318040dc82a597..c7ad9bc5973a0e9076e3d8fc7b594d63d842feed 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -776,7 +776,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, sdata = rcu_dereference_check(local->monitor_sdata, lockdep_is_held(&local->iflist_mtx) || lockdep_is_held(&local->hw.wiphy->mtx)); - if (sdata && + if (sdata && ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && (iter_flags & IEEE80211_IFACE_ITER_RESUME_ALL || !active_only || sdata->flags & IEEE80211_SDATA_IN_DRIVER)) iterator(data, sdata->vif.addr, &sdata->vif); diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 77e5dd42225802fa4418448c6858c06cc636545e..8551dab1d1e69836c84f68509bc9dab43a96cc67 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -366,7 +366,7 @@ static void mctp_test_route_input_sk(struct kunit *test) skb2 = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb2); - KUNIT_EXPECT_EQ(test, skb->len, 1); + KUNIT_EXPECT_EQ(test, skb2->len, 1); skb_free_datagram(sock->sk, skb2); diff --git a/net/mptcp/diag.c b/net/mptcp/diag.c index 3ae46b545d2c2b7b7bf55ea9b101bcce798489f9..2d3efb405437d85c0bca70d7a92ca3a7363365e1 100644 --- a/net/mptcp/diag.c +++ b/net/mptcp/diag.c @@ -94,7 +94,7 @@ static size_t subflow_get_info_size(const struct sock *sk) nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ */ nla_total_size_64bit(8) + /* MPTCP_SUBFLOW_ATTR_MAP_SEQ */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_MAP_SFSEQ */ - nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ + nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_SSN_OFFSET */ nla_total_size(2) + /* MPTCP_SUBFLOW_ATTR_MAP_DATALEN */ nla_total_size(4) + /* MPTCP_SUBFLOW_ATTR_FLAGS */ nla_total_size(1) + /* MPTCP_SUBFLOW_ATTR_ID_REM */ diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index c30405e7683370d3e3f422ac9a7b0c625348b1ae..7884217f33eb2f75ea76086c7f7fbea7106b0c1a 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -19,7 +19,9 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPTCPRetrans", MPTCP_MIB_RETRANSSEGS), SNMP_MIB_ITEM("MPJoinNoTokenFound", MPTCP_MIB_JOINNOTOKEN), SNMP_MIB_ITEM("MPJoinSynRx", MPTCP_MIB_JOINSYNRX), + SNMP_MIB_ITEM("MPJoinSynBackupRx", MPTCP_MIB_JOINSYNBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckRx", MPTCP_MIB_JOINSYNACKRX), + SNMP_MIB_ITEM("MPJoinSynAckBackupRx", MPTCP_MIB_JOINSYNACKBACKUPRX), SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 2704afd0dfe45249b1789d1c31732a982eafee65..66aa67f49d032c75e2821ed69be24b41e30a956a 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -14,7 +14,9 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RETRANSSEGS, /* Segments retransmitted at the MPTCP-level */ MPTCP_MIB_JOINNOTOKEN, /* Received MP_JOIN but the token was not found */ MPTCP_MIB_JOINSYNRX, /* Received a SYN + MP_JOIN */ + MPTCP_MIB_JOINSYNBACKUPRX, /* Received a SYN + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKRX, /* Received a SYN/ACK + MP_JOIN */ + MPTCP_MIB_JOINSYNACKBACKUPRX, /* Received a SYN/ACK + MP_JOIN + backup flag */ MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 8e8dcfbc299382454cf715a917c0f876ad5c35d4..ac2f1a54cc43ae4de33812357f82aef6b9b4aae0 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -909,7 +909,7 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; @@ -958,7 +958,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 55406720c607119b0502fe6918bbbcf19a0a355e..3e6e0f5510bb134ba51a4aeb880be9e496120d7f 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -60,16 +60,6 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ return 0; } -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list) -{ - pr_debug("msk=%p, rm_list_nr=%d", msk, rm_list->nr); - - spin_lock_bh(&msk->pm.lock); - mptcp_pm_nl_rm_subflow_received(msk, rm_list); - spin_unlock_bh(&msk->pm.lock); - return 0; -} - /* path manager event handlers */ void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side) @@ -426,15 +416,24 @@ int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc) return mptcp_pm_nl_get_local_id(msk, &skc_local); } +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc) +{ + struct mptcp_addr_info skc_local; + + mptcp_local_address((struct sock_common *)skc, &skc_local); + + if (mptcp_pm_is_userspace(msk)) + return mptcp_userspace_pm_is_backup(msk, &skc_local); + + return mptcp_pm_nl_is_backup(msk, &skc_local); +} + int mptcp_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int id, u8 *flags, int *ifindex) { *flags = 0; *ifindex = 0; - if (!id) - return 0; - if (mptcp_pm_is_userspace(msk)) return mptcp_userspace_pm_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); return mptcp_pm_nl_get_flags_and_ifindex_by_id(msk, id, flags, ifindex); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index ea9e5817b9e9d3dea5901798f5fee98690a28e14..3e4ad801786f2b7cf5088c8f9ffcce536cd26e1f 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -143,11 +143,13 @@ static bool lookup_subflow_by_daddr(const struct list_head *list, return false; } -static struct mptcp_pm_addr_entry * +static bool select_local_address(const struct pm_nl_pernet *pernet, - const struct mptcp_sock *msk) + const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; msk_owned_by_me(msk); @@ -159,17 +161,21 @@ select_local_address(const struct pm_nl_pernet *pernet, if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } -static struct mptcp_pm_addr_entry * -select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) +static bool +select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk, + struct mptcp_pm_addr_entry *new_entry) { - struct mptcp_pm_addr_entry *entry, *ret = NULL; + struct mptcp_pm_addr_entry *entry; + bool found = false; rcu_read_lock(); /* do not keep any additional per socket state, just signal @@ -184,11 +190,13 @@ select_signal_address(struct pm_nl_pernet *pernet, const struct mptcp_sock *msk) if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) continue; - ret = entry; + *new_entry = *entry; + found = true; break; } rcu_read_unlock(); - return ret; + + return found; } unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk) @@ -348,7 +356,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk, add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr); if (add_entry) { - if (mptcp_pm_is_kernel(msk)) + if (WARN_ON_ONCE(mptcp_pm_is_kernel(msk))) return false; sk_reset_timer(sk, &add_entry->add_timer, @@ -471,7 +479,6 @@ static void __mptcp_pm_send_ack(struct mptcp_sock *msk, struct mptcp_subflow_con slow = lock_sock_fast(ssk); if (prio) { subflow->send_mp_prio = 1; - subflow->backup = backup; subflow->request_bkup = backup; } @@ -514,8 +521,9 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info) static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; - struct mptcp_pm_addr_entry *local; + struct mptcp_pm_addr_entry local; unsigned int add_addr_signal_max; + bool signal_and_subflow = false; unsigned int local_addr_max; struct pm_nl_pernet *pernet; unsigned int subflows_max; @@ -556,8 +564,6 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) /* check first for announce */ if (msk->pm.add_addr_signaled < add_addr_signal_max) { - local = select_signal_address(pernet, msk); - /* due to racing events on both ends we can reach here while * previous add address is still running: if we invoke now * mptcp_pm_announce_addr(), that will fail and the @@ -568,16 +574,25 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) if (msk->pm.addr_signal & BIT(MPTCP_ADD_ADDR_SIGNAL)) return; - if (local) { - if (mptcp_pm_alloc_anno_list(msk, &local->addr)) { - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - msk->pm.add_addr_signaled++; - mptcp_pm_announce_addr(msk, &local->addr, false); - mptcp_pm_nl_addr_send_ack(msk); - } - } + if (!select_signal_address(pernet, msk, &local)) + goto subflow; + + /* If the alloc fails, we are on memory pressure, not worth + * continuing, and trying to create subflows. + */ + if (!mptcp_pm_alloc_anno_list(msk, &local.addr)) + return; + + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled++; + mptcp_pm_announce_addr(msk, &local.addr, false); + mptcp_pm_nl_addr_send_ack(msk); + + if (local.flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) + signal_and_subflow = true; } +subflow: /* check if should create a new subflow */ while (msk->pm.local_addr_used < local_addr_max && msk->pm.subflows < subflows_max) { @@ -585,21 +600,22 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk) bool fullmesh; int i, nr; - local = select_local_address(pernet, msk); - if (!local) + if (signal_and_subflow) + signal_and_subflow = false; + else if (!select_local_address(pernet, msk, &local)) break; - fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH); + fullmesh = !!(local.flags & MPTCP_PM_ADDR_FLAG_FULLMESH); msk->pm.local_addr_used++; - __clear_bit(local->addr.id, msk->pm.id_avail_bitmap); - nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs); + __clear_bit(local.addr.id, msk->pm.id_avail_bitmap); + nr = fill_remote_addresses_vec(msk, &local.addr, fullmesh, addrs); if (nr == 0) continue; spin_unlock_bh(&msk->pm.lock); for (i = 0; i < nr; i++) - __mptcp_subflow_connect(sk, &local->addr, &addrs[i]); + __mptcp_subflow_connect(sk, &local.addr, &addrs[i]); spin_lock_bh(&msk->pm.lock); } mptcp_pm_nl_check_work_pending(msk); @@ -624,6 +640,7 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, { struct sock *sk = (struct sock *)msk; struct mptcp_pm_addr_entry *entry; + struct mptcp_addr_info mpc_addr; struct pm_nl_pernet *pernet; unsigned int subflows_max; int i = 0; @@ -631,6 +648,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, pernet = pm_nl_get_pernet_from_msk(msk); subflows_max = mptcp_pm_get_subflows_max(msk); + mptcp_local_address((struct sock_common *)msk, &mpc_addr); + rcu_read_lock(); list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH)) @@ -641,7 +660,13 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk, if (msk->pm.subflows < subflows_max) { msk->pm.subflows++; - addrs[i++] = entry->addr; + addrs[i] = entry->addr; + + /* Special case for ID0: set the correct ID */ + if (mptcp_addresses_equal(&entry->addr, &mpc_addr, entry->addr.port)) + addrs[i].id = 0; + + i++; } } rcu_read_unlock(); @@ -817,25 +842,27 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, mptcp_close_ssk(sk, ssk, subflow); spin_lock_bh(&msk->pm.lock); - removed = true; + removed |= subflow->request_join; if (rm_type == MPTCP_MIB_RMSUBFLOW) __MPTCP_INC_STATS(sock_net(sk), rm_type); } - if (rm_type == MPTCP_MIB_RMSUBFLOW) - __set_bit(rm_id ? rm_id : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap); - else if (rm_type == MPTCP_MIB_RMADDR) + + if (rm_type == MPTCP_MIB_RMADDR) __MPTCP_INC_STATS(sock_net(sk), rm_type); + if (!removed) continue; if (!mptcp_pm_is_kernel(msk)) continue; - if (rm_type == MPTCP_MIB_RMADDR) { - msk->pm.add_addr_accepted--; - WRITE_ONCE(msk->pm.accept_addr, true); - } else if (rm_type == MPTCP_MIB_RMSUBFLOW) { - msk->pm.local_addr_used--; + if (rm_type == MPTCP_MIB_RMADDR && rm_id && + !WARN_ON_ONCE(msk->pm.add_addr_accepted == 0)) { + /* Note: if the subflow has been closed before, this + * add_addr_accepted counter will not be decremented. + */ + if (--msk->pm.add_addr_accepted < mptcp_pm_get_add_addr_accept_max(msk)) + WRITE_ONCE(msk->pm.accept_addr, true); } } } @@ -845,8 +872,8 @@ static void mptcp_pm_nl_rm_addr_received(struct mptcp_sock *msk) mptcp_pm_nl_rm_addr_or_subflow(msk, &msk->pm.rm_list_rx, MPTCP_MIB_RMADDR); } -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list) +static void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, + const struct mptcp_rm_list *rm_list) { mptcp_pm_nl_rm_addr_or_subflow(msk, rm_list, MPTCP_MIB_RMSUBFLOW); } @@ -1102,6 +1129,24 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc return ret; } +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc) +{ + struct pm_nl_pernet *pernet = pm_nl_get_pernet_from_msk(msk); + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + rcu_read_lock(); + list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, entry->addr.port)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + rcu_read_unlock(); + + return backup; +} + #define MPTCP_PM_CMD_GRP_OFFSET 0 #define MPTCP_PM_EV_GRP_OFFSET 1 @@ -1311,8 +1356,8 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) if (ret < 0) return ret; - if (addr.addr.port && !(addr.flags & MPTCP_PM_ADDR_FLAG_SIGNAL)) { - GENL_SET_ERR_MSG(info, "flags must have signal when using port"); + if (addr.addr.port && !address_use_port(&addr)) { + GENL_SET_ERR_MSG(info, "flags must have signal and not subflow when using port"); return -EINVAL; } @@ -1363,6 +1408,10 @@ int mptcp_pm_nl_get_flags_and_ifindex_by_id(struct mptcp_sock *msk, unsigned int struct sock *sk = (struct sock *)msk; struct net *net = sock_net(sk); + /* No entries with ID 0 */ + if (id == 0) + return 0; + rcu_read_lock(); entry = __lookup_addr_by_id(pm_nl_get_pernet(net), id); if (entry) { @@ -1401,12 +1450,24 @@ static bool mptcp_pm_remove_anno_addr(struct mptcp_sock *msk, ret = remove_anno_list_by_saddr(msk, addr); if (ret || force) { spin_lock_bh(&msk->pm.lock); + if (ret) { + __set_bit(addr->id, msk->pm.id_avail_bitmap); + msk->pm.add_addr_signaled--; + } mptcp_pm_remove_addr(msk, &list); spin_unlock_bh(&msk->pm.lock); } return ret; } +static void __mark_subflow_endp_available(struct mptcp_sock *msk, u8 id) +{ + /* If it was marked as used, and not ID 0, decrement local_addr_used */ + if (!__test_and_set_bit(id ? : msk->mpc_endpoint_id, msk->pm.id_avail_bitmap) && + id && !WARN_ON_ONCE(msk->pm.local_addr_used == 0)) + msk->pm.local_addr_used--; +} + static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, const struct mptcp_pm_addr_entry *entry) { @@ -1435,8 +1496,19 @@ static int mptcp_nl_remove_subflow_and_signal_addr(struct net *net, remove_subflow = lookup_subflow_by_saddr(&msk->conn_list, addr); mptcp_pm_remove_anno_addr(msk, addr, remove_subflow && !(entry->flags & MPTCP_PM_ADDR_FLAG_IMPLICIT)); - if (remove_subflow) - mptcp_pm_remove_subflow(msk, &list); + + if (remove_subflow) { + spin_lock_bh(&msk->pm.lock); + mptcp_pm_nl_rm_subflow_received(msk, &list); + spin_unlock_bh(&msk->pm.lock); + } + + if (entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + spin_lock_bh(&msk->pm.lock); + __mark_subflow_endp_available(msk, list.ids[0]); + spin_unlock_bh(&msk->pm.lock); + } + release_sock(sk); next: @@ -1471,6 +1543,7 @@ static int mptcp_nl_remove_id_zero_address(struct net *net, spin_lock_bh(&msk->pm.lock); mptcp_pm_remove_addr(msk, &list); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, 0); spin_unlock_bh(&msk->pm.lock); release_sock(sk); @@ -1534,16 +1607,25 @@ void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list) { struct mptcp_rm_list alist = { .nr = 0 }; struct mptcp_pm_addr_entry *entry; + int anno_nr = 0; list_for_each_entry(entry, rm_list, list) { - if ((remove_anno_list_by_saddr(msk, &entry->addr) || - lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) && - alist.nr < MPTCP_RM_IDS_MAX) - alist.ids[alist.nr++] = entry->addr.id; + if (alist.nr >= MPTCP_RM_IDS_MAX) + break; + + /* only delete if either announced or matching a subflow */ + if (remove_anno_list_by_saddr(msk, &entry->addr)) + anno_nr++; + else if (!lookup_subflow_by_saddr(&msk->conn_list, + &entry->addr)) + continue; + + alist.ids[alist.nr++] = entry->addr.id; } if (alist.nr) { spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= anno_nr; mptcp_pm_remove_addr(msk, &alist); spin_unlock_bh(&msk->pm.lock); } @@ -1556,22 +1638,26 @@ static void mptcp_pm_remove_addrs_and_subflows(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry; list_for_each_entry(entry, rm_list, list) { - if (lookup_subflow_by_saddr(&msk->conn_list, &entry->addr) && - slist.nr < MPTCP_RM_IDS_MAX) + if (slist.nr < MPTCP_RM_IDS_MAX && + lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) slist.ids[slist.nr++] = entry->addr.id; - if (remove_anno_list_by_saddr(msk, &entry->addr) && - alist.nr < MPTCP_RM_IDS_MAX) + if (alist.nr < MPTCP_RM_IDS_MAX && + remove_anno_list_by_saddr(msk, &entry->addr)) alist.ids[alist.nr++] = entry->addr.id; } + spin_lock_bh(&msk->pm.lock); if (alist.nr) { - spin_lock_bh(&msk->pm.lock); + msk->pm.add_addr_signaled -= alist.nr; mptcp_pm_remove_addr(msk, &alist); - spin_unlock_bh(&msk->pm.lock); } if (slist.nr) - mptcp_pm_remove_subflow(msk, &slist); + mptcp_pm_nl_rm_subflow_received(msk, &slist); + /* Reset counters: maybe some subflows have been removed before */ + bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + msk->pm.local_addr_used = 0; + spin_unlock_bh(&msk->pm.lock); } static void mptcp_nl_remove_addrs_list(struct net *net, @@ -1859,6 +1945,7 @@ static void mptcp_pm_nl_fullmesh(struct mptcp_sock *msk, spin_lock_bh(&msk->pm.lock); mptcp_pm_nl_rm_subflow_received(msk, &list); + __mark_subflow_endp_available(msk, list.ids[0]); mptcp_pm_create_subflow_or_signal_addr(msk); spin_unlock_bh(&msk->pm.lock); } diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index f0a4590506c69a13123ecc041ce1724bc36d95e2..8eaa9fbe3e343be193c2b7fd0b73cba3a4227525 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -165,6 +165,24 @@ int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, return mptcp_userspace_pm_append_new_local_addr(msk, &new_entry, true); } +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, + struct mptcp_addr_info *skc) +{ + struct mptcp_pm_addr_entry *entry; + bool backup = false; + + spin_lock_bh(&msk->pm.lock); + list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) { + if (mptcp_addresses_equal(&entry->addr, skc, false)) { + backup = !!(entry->flags & MPTCP_PM_ADDR_FLAG_BACKUP); + break; + } + } + spin_unlock_bh(&msk->pm.lock); + + return backup; +} + int mptcp_pm_nl_announce_doit(struct sk_buff *skb, struct genl_info *info) { struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN]; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index a26c2c840fd9ca6e1bd90aa57e0a11c122534eca..0d536b183a6c559fc932dd5ececb1482e195ee68 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -350,8 +350,10 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, skb_orphan(skb); /* try to fetch required memory from subflow */ - if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) + if (!mptcp_rmem_schedule(sk, ssk, skb->truesize)) { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); goto drop; + } has_rxtstamp = TCP_SKB_CB(skb)->has_rxtstamp; @@ -844,10 +846,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) sk_rbuf = ssk_rbuf; /* over limit? can't append more skbs to msk, Also, no need to wake-up*/ - if (__mptcp_rmem(sk) > sk_rbuf) { - MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RCVPRUNED); + if (__mptcp_rmem(sk) > sk_rbuf) return; - } /* Wake-up the reader only for in-sequence data */ mptcp_data_lock(sk); @@ -1422,13 +1422,15 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } mptcp_for_each_subflow(msk, subflow) { + bool backup = subflow->backup || subflow->request_bkup; + trace_mptcp_subflow_get_send(subflow); ssk = mptcp_subflow_tcp_sock(subflow); if (!mptcp_subflow_active(subflow)) continue; tout = max(tout, mptcp_timeout_from_subflow(subflow)); - nr_active += !subflow->backup; + nr_active += !backup; pace = subflow->avg_pacing_rate; if (unlikely(!pace)) { /* init pacing rate from socket */ @@ -1439,9 +1441,9 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) } linger_time = div_u64((u64)READ_ONCE(ssk->sk_wmem_queued) << 32, pace); - if (linger_time < send_info[subflow->backup].linger_time) { - send_info[subflow->backup].ssk = ssk; - send_info[subflow->backup].linger_time = linger_time; + if (linger_time < send_info[backup].linger_time) { + send_info[backup].ssk = ssk; + send_info[backup].linger_time = linger_time; } } __mptcp_set_timeout(sk, tout); diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index b11a4e50d52b6f1950660654eee022d387b6dc4b..a1c1b0ff1ce1c8383a4af066ce3f28e6a6e36647 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -448,6 +448,7 @@ struct mptcp_subflow_request_sock { u16 mp_capable : 1, mp_join : 1, backup : 1, + request_bkup : 1, csum_reqd : 1, allow_join_id0 : 1; u8 local_id; @@ -1025,7 +1026,6 @@ int mptcp_pm_announce_addr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr, bool echo); int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); -int mptcp_pm_remove_subflow(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_list); void mptcp_pm_remove_addrs(struct mptcp_sock *msk, struct list_head *rm_list); void mptcp_free_local_addr_list(struct mptcp_sock *msk); @@ -1108,6 +1108,9 @@ bool mptcp_pm_rm_addr_signal(struct mptcp_sock *msk, unsigned int remaining, int mptcp_pm_get_local_id(struct mptcp_sock *msk, struct sock_common *skc); int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_userspace_pm_get_local_id(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_pm_is_backup(struct mptcp_sock *msk, struct sock_common *skc); +bool mptcp_pm_nl_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); +bool mptcp_userspace_pm_is_backup(struct mptcp_sock *msk, struct mptcp_addr_info *skc); int mptcp_pm_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); int mptcp_pm_nl_dump_addr(struct sk_buff *msg, struct netlink_callback *cb); @@ -1129,8 +1132,6 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo void __init mptcp_pm_nl_init(void); void mptcp_pm_nl_work(struct mptcp_sock *msk); -void mptcp_pm_nl_rm_subflow_received(struct mptcp_sock *msk, - const struct mptcp_rm_list *rm_list); unsigned int mptcp_pm_get_add_addr_signal_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_add_addr_accept_max(const struct mptcp_sock *msk); unsigned int mptcp_pm_get_subflows_max(const struct mptcp_sock *msk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 39e2cbdf38019cc99e31e54ac221dbc7ef825110..a21c712350c36eaad55b7d9148f5dbaad53d19a6 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -100,6 +100,7 @@ static struct mptcp_sock *subflow_token_join_request(struct request_sock *req) return NULL; } subflow_req->local_id = local_id; + subflow_req->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)req); return msk; } @@ -168,6 +169,9 @@ static int subflow_check_req(struct request_sock *req, return 0; } else if (opt_mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); + + if (mp_opt.backup) + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNBACKUPRX); } if (opt_mp_capable && listener->request_mptcp) { @@ -577,6 +581,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow->mp_join = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); + if (subflow->backup) + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKBACKUPRX); + if (subflow_use_different_dport(msk, sk)) { pr_debug("synack inet_dport=%d %d", ntohs(inet_sk(sk)->inet_dport), @@ -614,6 +621,8 @@ static int subflow_chk_local_id(struct sock *sk) return err; subflow_set_local_id(subflow, err); + subflow->request_bkup = mptcp_pm_is_backup(msk, (struct sock_common *)sk); + return 0; } @@ -1221,14 +1230,22 @@ static void mptcp_subflow_discard_data(struct sock *ssk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); bool fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; - u32 incr; + struct tcp_sock *tp = tcp_sk(ssk); + u32 offset, incr, avail_len; - incr = limit >= skb->len ? skb->len + fin : limit; + offset = tp->copied_seq - TCP_SKB_CB(skb)->seq; + if (WARN_ON_ONCE(offset > skb->len)) + goto out; - pr_debug("discarding=%d len=%d seq=%d", incr, skb->len, - subflow->map_subflow_seq); + avail_len = skb->len - offset; + incr = limit >= avail_len ? avail_len + fin : limit; + + pr_debug("discarding=%d len=%d offset=%d seq=%d", incr, skb->len, + offset, subflow->map_subflow_seq); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DUPDATA); tcp_sk(ssk)->copied_seq += incr; + +out: if (!before(tcp_sk(ssk)->copied_seq, TCP_SKB_CB(skb)->end_seq)) sk_eat_skb(ssk, skb); if (mptcp_subflow_get_map_offset(subflow) >= subflow->map_data_len) @@ -2005,6 +2022,7 @@ static void subflow_ulp_clone(const struct request_sock *req, new_ctx->fully_established = 1; new_ctx->remote_key_valid = 1; new_ctx->backup = subflow_req->backup; + new_ctx->request_bkup = subflow_req->request_bkup; WRITE_ONCE(new_ctx->remote_id, subflow_req->remote_id); new_ctx->token = subflow_req->token; new_ctx->thmac = subflow_req->thmac; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 88787b45e30d6bffe2c6d6db2f556163e9094c3d..8b541a080342060ceceb588f489e26fe5aa8c9ff 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -17,6 +17,9 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return NF_ACCEPT; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); proto = veth->h_vlan_encapsulated_proto; break; diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c index c2c005234dcd385a1377f3ab34c27f82d9b18b0c..98edcaa37b38d46f8236dd5ef1dff569581b8777 100644 --- a/net/netfilter/nf_flow_table_ip.c +++ b/net/netfilter/nf_flow_table_ip.c @@ -281,6 +281,9 @@ static bool nf_flow_skb_encap_protocol(struct sk_buff *skb, __be16 proto, switch (skb->protocol) { case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return false; + veth = (struct vlan_ethhdr *)skb_mac_header(skb); if (veth->h_vlan_encapsulated_proto == proto) { *offset += VLAN_HLEN; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index ff1a4e36c2b5dcf9d12d0f65ff98277f56a17beb..e06bc36f49fe74e89c4074eb8b32409bc7035e8c 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -841,8 +841,8 @@ static int nf_flow_offload_tuple(struct nf_flowtable *flowtable, struct list_head *block_cb_list) { struct flow_cls_offload cls_flow = {}; + struct netlink_ext_ack extack = {}; struct flow_block_cb *block_cb; - struct netlink_ext_ack extack; __be16 proto = ETH_P_ALL; int err, i = 0; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 481ee78e77bcf96d2271b465b259554f83df33b0..0a2f79346958965833c7636f436f297c5afc7a53 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -8020,6 +8020,19 @@ cont: return skb->len; } +static int nf_tables_dumpreset_obj(struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct nftables_pernet *nft_net = nft_pernet(sock_net(skb->sk)); + int ret; + + mutex_lock(&nft_net->commit_mutex); + ret = nf_tables_dump_obj(skb, cb); + mutex_unlock(&nft_net->commit_mutex); + + return ret; +} + static int nf_tables_dump_obj_start(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8036,12 +8049,18 @@ static int nf_tables_dump_obj_start(struct netlink_callback *cb) if (nla[NFTA_OBJ_TYPE]) ctx->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - ctx->reset = true; - return 0; } +static int nf_tables_dumpreset_obj_start(struct netlink_callback *cb) +{ + struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; + + ctx->reset = true; + + return nf_tables_dump_obj_start(cb); +} + static int nf_tables_dump_obj_done(struct netlink_callback *cb) { struct nft_obj_dump_ctx *ctx = (void *)cb->ctx; @@ -8052,8 +8071,9 @@ static int nf_tables_dump_obj_done(struct netlink_callback *cb) } /* called with rcu_read_lock held */ -static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, - const struct nlattr * const nla[]) +static struct sk_buff * +nf_tables_getobj_single(u32 portid, const struct nfnl_info *info, + const struct nlattr * const nla[], bool reset) { struct netlink_ext_ack *extack = info->extack; u8 genmask = nft_genmask_cur(info->net); @@ -8062,72 +8082,109 @@ static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, struct net *net = info->net; struct nft_object *obj; struct sk_buff *skb2; - bool reset = false; u32 objtype; int err; - if (info->nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .start = nf_tables_dump_obj_start, - .dump = nf_tables_dump_obj, - .done = nf_tables_dump_obj_done, - .module = THIS_MODULE, - .data = (void *)nla, - }; - - return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); - } - if (!nla[NFTA_OBJ_NAME] || !nla[NFTA_OBJ_TYPE]) - return -EINVAL; + return ERR_PTR(-EINVAL); table = nft_table_lookup(net, nla[NFTA_OBJ_TABLE], family, genmask, 0); if (IS_ERR(table)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_TABLE]); - return PTR_ERR(table); + return ERR_CAST(table); } objtype = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); obj = nft_obj_lookup(net, table, nla[NFTA_OBJ_NAME], objtype, genmask); if (IS_ERR(obj)) { NL_SET_BAD_ATTR(extack, nla[NFTA_OBJ_NAME]); - return PTR_ERR(obj); + return ERR_CAST(obj); } skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb2) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETOBJ_RESET) - reset = true; + err = nf_tables_fill_obj_info(skb2, net, portid, + info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, + family, table, obj, reset); + if (err < 0) { + kfree_skb(skb2); + return ERR_PTR(err); + } - if (reset) { - const struct nftables_pernet *nft_net; - char *buf; + return skb2; +} - nft_net = nft_pernet(net); - buf = kasprintf(GFP_ATOMIC, "%s:%u", table->name, nft_net->base_seq); +static int nf_tables_getobj(struct sk_buff *skb, const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + u32 portid = NETLINK_CB(skb).portid; + struct sk_buff *skb2; - audit_log_nfcfg(buf, - family, - 1, - AUDIT_NFT_OP_OBJ_RESET, - GFP_ATOMIC); - kfree(buf); + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dump_obj_start, + .dump = nf_tables_dump_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); } - err = nf_tables_fill_obj_info(skb2, net, NETLINK_CB(skb).portid, - info->nlh->nlmsg_seq, NFT_MSG_NEWOBJ, 0, - family, table, obj, reset); - if (err < 0) - goto err_fill_obj_info; + skb2 = nf_tables_getobj_single(portid, info, nla, false); + if (IS_ERR(skb2)) + return PTR_ERR(skb2); - return nfnetlink_unicast(skb2, net, NETLINK_CB(skb).portid); + return nfnetlink_unicast(skb2, info->net, portid); +} -err_fill_obj_info: - kfree_skb(skb2); - return err; +static int nf_tables_getobj_reset(struct sk_buff *skb, + const struct nfnl_info *info, + const struct nlattr * const nla[]) +{ + struct nftables_pernet *nft_net = nft_pernet(info->net); + u32 portid = NETLINK_CB(skb).portid; + struct net *net = info->net; + struct sk_buff *skb2; + char *buf; + + if (info->nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .start = nf_tables_dumpreset_obj_start, + .dump = nf_tables_dumpreset_obj, + .done = nf_tables_dump_obj_done, + .module = THIS_MODULE, + .data = (void *)nla, + }; + + return nft_netlink_dump_start_rcu(info->sk, skb, info->nlh, &c); + } + + if (!try_module_get(THIS_MODULE)) + return -EINVAL; + rcu_read_unlock(); + mutex_lock(&nft_net->commit_mutex); + skb2 = nf_tables_getobj_single(portid, info, nla, true); + mutex_unlock(&nft_net->commit_mutex); + rcu_read_lock(); + module_put(THIS_MODULE); + + if (IS_ERR(skb2)) + return PTR_ERR(skb2); + + buf = kasprintf(GFP_ATOMIC, "%.*s:%u", + nla_len(nla[NFTA_OBJ_TABLE]), + (char *)nla_data(nla[NFTA_OBJ_TABLE]), + nft_net->base_seq); + audit_log_nfcfg(buf, info->nfmsg->nfgen_family, 1, + AUDIT_NFT_OP_OBJ_RESET, GFP_ATOMIC); + kfree(buf); + + return nfnetlink_unicast(skb2, net, portid); } static void nft_obj_destroy(const struct nft_ctx *ctx, struct nft_object *obj) @@ -9410,7 +9467,7 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = { .policy = nft_obj_policy, }, [NFT_MSG_GETOBJ_RESET] = { - .call = nf_tables_getobj, + .call = nf_tables_getobj_reset, .type = NFNL_CB_RCU, .attr_count = NFTA_OBJ_MAX, .policy = nft_obj_policy, diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 4abf660c7baff01a361edaa8392edf2fac48f340..932b3ddb34f13c44333b32abd534b12f1d12955d 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -427,8 +427,10 @@ replay_abort: nfnl_unlock(subsys_id); - if (nlh->nlmsg_flags & NLM_F_ACK) + if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); + } while (skb->len >= nlmsg_total_size(0)) { int msglen, type; @@ -577,6 +579,7 @@ done: ss->abort(net, oskb, NFNL_ABORT_NONE); netlink_ack(oskb, nlmsg_hdr(oskb), err, NULL); } else if (nlh->nlmsg_flags & NLM_F_ACK) { + memset(&extack, 0, sizeof(extack)); nfnl_err_add(&err_list, nlh, 0, &extack); } } else { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 55e28e1da66ec6e6e12a6ffcd5d058fc6d590520..e0716da256bf55ecb8fb275f8a36071c35f1e93f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -820,10 +820,41 @@ static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) { #if IS_ENABLED(CONFIG_NF_CONNTRACK) static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; - const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + struct nf_conn *ct = (void *)skb_nfct(entry->skb); + unsigned long status; + unsigned int use; - if (ct && ((ct->status & flags) == IPS_DYING)) + if (!ct) + return false; + + status = READ_ONCE(ct->status); + if ((status & flags) == IPS_DYING) return true; + + if (status & IPS_CONFIRMED) + return false; + + /* in some cases skb_clone() can occur after initial conntrack + * pickup, but conntrack assumes exclusive skb->_nfct ownership for + * unconfirmed entries. + * + * This happens for br_netfilter and with ip multicast routing. + * We can't be solved with serialization here because one clone could + * have been queued for local delivery. + */ + use = refcount_read(&ct->ct_general.use); + if (likely(use == 1)) + return false; + + /* Can't decrement further? Exclusive ownership. */ + if (!refcount_dec_not_one(&ct->ct_general.use)) + return false; + + skb_set_nfct(entry->skb, 0); + /* No nf_ct_put(): we already decremented .use and it cannot + * drop down to 0. + */ + return true; #endif return false; } diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 291ed2026367ec4a5b2b893caa3d5abfcf2c5959..eab0dc66bee6bd7e4f29adb5690d2c3ffd585090 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -107,11 +107,16 @@ static void nft_counter_reset(struct nft_counter_percpu_priv *priv, struct nft_counter *total) { struct nft_counter *this_cpu; + seqcount_t *myseq; local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); + myseq = this_cpu_ptr(&nft_counter_seq); + + write_seqcount_begin(myseq); this_cpu->packets -= total->packets; this_cpu->bytes -= total->bytes; + write_seqcount_end(myseq); local_bh_enable(); } @@ -265,7 +270,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, struct nft_counter *this_cpu; seqcount_t *myseq; - preempt_disable(); + local_bh_disable(); this_cpu = this_cpu_ptr(priv->counter); myseq = this_cpu_ptr(&nft_counter_seq); @@ -273,7 +278,7 @@ static void nft_counter_offload_stats(struct nft_expr *expr, this_cpu->packets += stats->pkts; this_cpu->bytes += stats->bytes; write_seqcount_end(myseq); - preempt_enable(); + local_bh_enable(); } void nft_counter_init_seqcount(void) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 99d72543abd3aa853345fa55ffb1f99bf77da21e..78d9961fcd446da5e1be0c0d1b9e0936020517b0 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -2706,7 +2706,7 @@ static struct pernet_operations ovs_net_ops = { }; static const char * const ovs_drop_reasons[] = { -#define S(x) (#x), +#define S(x) [(x) & ~SKB_DROP_REASON_SUBSYS_MASK] = (#x), OVS_DROP_REASONS(S) #undef S }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 113b907da0f757af7be920cc9b3a1b1c769f5804..3ba8e7e739b58a96e66ca64d38bff758500df3e1 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -44,6 +44,8 @@ static DEFINE_MUTEX(zones_mutex); struct zones_ht_key { struct net *net; u16 zone; + /* Note : pad[] must be the last field. */ + u8 pad[]; }; struct tcf_ct_flow_table { @@ -60,7 +62,7 @@ struct tcf_ct_flow_table { static const struct rhashtable_params zones_params = { .head_offset = offsetof(struct tcf_ct_flow_table, node), .key_offset = offsetof(struct tcf_ct_flow_table, key), - .key_len = sizeof_field(struct tcf_ct_flow_table, key), + .key_len = offsetof(struct zones_ht_key, pad), .automatic_shrinking = true, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index edc72962ae63a71bd0fb8bc625ed9509391cd6ac..0f8d581438c392aaed3d3b0fb63f3b33e2c4d09e 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -446,12 +446,10 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct netem_sched_data *q = qdisc_priv(sch); /* We don't fill cb now as skb_unshare() may invalidate it */ struct netem_skb_cb *cb; - struct sk_buff *skb2; + struct sk_buff *skb2 = NULL; struct sk_buff *segs = NULL; unsigned int prev_len = qdisc_pkt_len(skb); int count = 1; - int rc = NET_XMIT_SUCCESS; - int rc_drop = NET_XMIT_DROP; /* Do not fool qdisc_drop_all() */ skb->prev = NULL; @@ -480,19 +478,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, skb_orphan_partial(skb); /* - * If we need to duplicate packet, then re-insert at top of the - * qdisc tree, since parent queuer expects that only one - * skb will be queued. + * If we need to duplicate packet, then clone it before + * original is modified. */ - if (count > 1 && (skb2 = skb_clone(skb, GFP_ATOMIC)) != NULL) { - struct Qdisc *rootq = qdisc_root_bh(sch); - u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ - - q->duplicate = 0; - rootq->enqueue(skb2, rootq, to_free); - q->duplicate = dupsave; - rc_drop = NET_XMIT_SUCCESS; - } + if (count > 1) + skb2 = skb_clone(skb, GFP_ATOMIC); /* * Randomized packet corruption. @@ -504,7 +494,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (skb_is_gso(skb)) { skb = netem_segment(skb, sch, to_free); if (!skb) - return rc_drop; + goto finish_segs; + segs = skb->next; skb_mark_not_on_list(skb); qdisc_skb_cb(skb)->pkt_len = skb->len; @@ -530,7 +521,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* re-link segs, so that qdisc_drop_all() frees them all */ skb->next = segs; qdisc_drop_all(skb, sch, to_free); - return rc_drop; + if (skb2) + __qdisc_drop(skb2, to_free); + return NET_XMIT_DROP; + } + + /* + * If doing duplication then re-insert at top of the + * qdisc tree, since parent queuer expects that only one + * skb will be queued. + */ + if (skb2) { + struct Qdisc *rootq = qdisc_root_bh(sch); + u32 dupsave = q->duplicate; /* prevent duplicating a dup... */ + + q->duplicate = 0; + rootq->enqueue(skb2, rootq, to_free); + q->duplicate = dupsave; + skb2 = NULL; } qdisc_qstats_backlog_inc(sch, skb); @@ -601,9 +609,12 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, } finish_segs: + if (skb2) + __qdisc_drop(skb2, to_free); + if (segs) { unsigned int len, last_len; - int nb; + int rc, nb; len = skb ? skb->len : 0; nb = skb ? 1 : 0; diff --git a/net/sctp/input.c b/net/sctp/input.c index 17fcaa9b0df9452bbfe7c3bb4b2d300e6ca6ce40..a8a254a5008e529a3baf2418f784290188933aee 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -735,15 +735,19 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) struct sock *sk = ep->base.sk; struct net *net = sock_net(sk); struct sctp_hashbucket *head; + int err = 0; ep->hashent = sctp_ep_hashfn(net, ep->base.bind_addr.port); head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (sk->sk_reuseport) { bool any = sctp_is_ep_boundall(sk); struct sctp_endpoint *ep2; struct list_head *list; - int cnt = 0, err = 1; + int cnt = 0; + + err = 1; list_for_each(list, &ep->base.bind_addr.address_list) cnt++; @@ -761,24 +765,24 @@ static int __sctp_hash_endpoint(struct sctp_endpoint *ep) if (!err) { err = reuseport_add_sock(sk, sk2, any); if (err) - return err; + goto out; break; } else if (err < 0) { - return err; + goto out; } } if (err) { err = reuseport_alloc(sk, any); if (err) - return err; + goto out; } } - write_lock(&head->lock); hlist_add_head(&ep->node, &head->chain); +out: write_unlock(&head->lock); - return 0; + return err; } /* Add an endpoint to the hash. Local BH-safe. */ @@ -803,10 +807,9 @@ static void __sctp_unhash_endpoint(struct sctp_endpoint *ep) head = &sctp_ep_hashtable[ep->hashent]; + write_lock(&head->lock); if (rcu_access_pointer(sk->sk_reuseport_cb)) reuseport_detach_sock(sk); - - write_lock(&head->lock); hlist_del_init(&ep->node); write_unlock(&head->lock); } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 73a875573e7ad5b7a95f7941e33f0d784a91d16d..8e3093938cd2268ab20979786d28fa79fe64853f 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -3319,10 +3319,8 @@ int smc_create_clcsk(struct net *net, struct sock *sk, int family) rc = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &smc->clcsock); - if (rc) { - sk_common_release(sk); + if (rc) return rc; - } /* smc_clcsock_release() does not wait smc->clcsock->sk's * destruction; its sk_state might not be TCP_CLOSE after @@ -3368,6 +3366,9 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; else rc = smc_create_clcsk(net, sk, family); + + if (rc) + sk_common_release(sk); out: return rc; } diff --git a/net/smc/smc_stats.h b/net/smc/smc_stats.h index 9d32058db2b5d62ca122d1ead0299427575dda35..e19177ce409230212f0e71046038f2e1138862c7 100644 --- a/net/smc/smc_stats.h +++ b/net/smc/smc_stats.h @@ -19,7 +19,7 @@ #include "smc_clc.h" -#define SMC_MAX_FBACK_RSN_CNT 30 +#define SMC_MAX_FBACK_RSN_CNT 36 enum { SMC_BUF_8K, diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index e03f14024e471d3104b08ca99375ab6d871577a5..88a59cfa5583c064d02df1b3632cb6021a24390d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -161,7 +161,7 @@ param_get_pool_mode(char *buf, const struct kernel_param *kp) str[len] = '\n'; str[len + 1] = '\0'; - return sysfs_emit(buf, str); + return sysfs_emit(buf, "%s", str); } module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, diff --git a/net/sunrpc/xprtrdma/ib_client.c b/net/sunrpc/xprtrdma/ib_client.c index a938c19c3490d8e86a67ddfe3550ff67e1aa5c40..8507cd4d892170dcaf166056e4013aa821c1fcee 100644 --- a/net/sunrpc/xprtrdma/ib_client.c +++ b/net/sunrpc/xprtrdma/ib_client.c @@ -62,10 +62,11 @@ int rpcrdma_rn_register(struct ib_device *device, if (!rd || test_bit(RPCRDMA_RD_F_REMOVING, &rd->rd_flags)) return -ENETUNREACH; - kref_get(&rd->rd_kref); if (xa_alloc(&rd->rd_xa, &rn->rn_index, rn, xa_limit_32b, GFP_KERNEL) < 0) return -ENOMEM; + kref_get(&rd->rd_kref); rn->rn_done = done; + trace_rpcrdma_client_register(device, rn); return 0; } @@ -91,6 +92,7 @@ void rpcrdma_rn_unregister(struct ib_device *device, if (!rd) return; + trace_rpcrdma_client_unregister(device, rn); xa_erase(&rd->rd_xa, rn->rn_index); kref_put(&rd->rd_kref, rpcrdma_rn_release); } @@ -111,7 +113,7 @@ static int rpcrdma_add_one(struct ib_device *device) return -ENOMEM; kref_init(&rd->rd_kref); - xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC1); + xa_init_flags(&rd->rd_xa, XA_FLAGS_ALLOC); rd->rd_device = device; init_completion(&rd->rd_done); ib_set_client_data(device, &rpcrdma_ib_client, rd); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 4b040285aa78c061d636e1a0a75a59fbe931333b..0ff9b2dd86bac261fe3d787e0f94eaa3a0c456c9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1270,25 +1270,28 @@ out: return err; } +int __vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, + size_t len, int flags) +{ + struct sock *sk = sock->sk; + struct vsock_sock *vsk = vsock_sk(sk); + + return vsk->transport->dgram_dequeue(vsk, msg, len, flags); +} + int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { #ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; const struct proto *prot; -#endif - struct vsock_sock *vsk; - struct sock *sk; - sk = sock->sk; - vsk = vsock_sk(sk); - -#ifdef CONFIG_BPF_SYSCALL prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) return prot->recvmsg(sk, msg, len, flags, NULL); #endif - return vsk->transport->dgram_dequeue(vsk, msg, len, flags); + return __vsock_dgram_recvmsg(sock, msg, len, flags); } EXPORT_SYMBOL_GPL(vsock_dgram_recvmsg); @@ -2174,15 +2177,12 @@ out: } int -vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +__vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) { struct sock *sk; struct vsock_sock *vsk; const struct vsock_transport *transport; -#ifdef CONFIG_BPF_SYSCALL - const struct proto *prot; -#endif int err; sk = sock->sk; @@ -2233,14 +2233,6 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, goto out; } -#ifdef CONFIG_BPF_SYSCALL - prot = READ_ONCE(sk->sk_prot); - if (prot != &vsock_proto) { - release_sock(sk); - return prot->recvmsg(sk, msg, len, flags, NULL); - } -#endif - if (sk->sk_type == SOCK_STREAM) err = __vsock_stream_recvmsg(sk, msg, len, flags); else @@ -2250,6 +2242,22 @@ out: release_sock(sk); return err; } + +int +vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ +#ifdef CONFIG_BPF_SYSCALL + struct sock *sk = sock->sk; + const struct proto *prot; + + prot = READ_ONCE(sk->sk_prot); + if (prot != &vsock_proto) + return prot->recvmsg(sk, msg, len, flags, NULL); +#endif + + return __vsock_connectible_recvmsg(sock, msg, len, flags); +} EXPORT_SYMBOL_GPL(vsock_connectible_recvmsg); static int vsock_set_rcvlowat(struct sock *sk, int val) diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index a3c97546ab84a620263ed17fafaf94910d416f07..c42c5cc18f324108e044772e957c8d42c92ead8c 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -64,9 +64,9 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int int err; if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) - err = vsock_connectible_recvmsg(sock, msg, len, flags); + err = __vsock_connectible_recvmsg(sock, msg, len, flags); else if (sk->sk_type == SOCK_DGRAM) - err = vsock_dgram_recvmsg(sock, msg, len, flags); + err = __vsock_dgram_recvmsg(sock, msg, len, flags); else err = -EPROTOTYPE; diff --git a/net/wireless/scan.c b/net/wireless/scan.c index d99319d82205496636ca11a72837afbca5074fa0..64eeed82d43d5c90423a398464122eb41784e168 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -3178,8 +3178,7 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, struct ieee80211_mgmt *mgmt, size_t len, gfp_t gfp) { - size_t min_hdr_len = offsetof(struct ieee80211_mgmt, - u.probe_resp.variable); + size_t min_hdr_len; struct ieee80211_ext *ext = NULL; enum cfg80211_bss_frame_type ftype; u16 beacon_interval; @@ -3202,10 +3201,16 @@ cfg80211_inform_bss_frame_data(struct wiphy *wiphy, if (ieee80211_is_s1g_beacon(mgmt->frame_control)) { ext = (void *) mgmt; - min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_beacon); if (ieee80211_is_s1g_short_beacon(mgmt->frame_control)) min_hdr_len = offsetof(struct ieee80211_ext, u.s1g_short_beacon.variable); + else + min_hdr_len = offsetof(struct ieee80211_ext, + u.s1g_beacon.variable); + } else { + /* same for beacons */ + min_hdr_len = offsetof(struct ieee80211_mgmt, + u.probe_resp.variable); } if (WARN_ON(len < min_hdr_len)) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index e419aa8c4a5ac64ea28476e99c08c0a982e9914c..d9d7bf8bb5c1acde38f1263da3a371d4788c8641 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1045,6 +1045,7 @@ void cfg80211_connect_done(struct net_device *dev, cfg80211_hold_bss( bss_from_pub(params->links[link].bss)); ev->cr.links[link].bss = params->links[link].bss; + ev->cr.links[link].status = params->links[link].status; if (params->links[link].addr) { ev->cr.links[link].addr = next; diff --git a/rust/Makefile b/rust/Makefile index 1f10f92737f2c066b62729c098ffdff8af4fa087..8de3ebba95512c320448299b6f7a38f60dd60355 100644 --- a/rust/Makefile +++ b/rust/Makefile @@ -227,7 +227,7 @@ bindgen_skip_c_flags := -mno-fp-ret-in-387 -mpreferred-stack-boundary=% \ -fno-reorder-blocks -fno-allow-store-data-races -fasan-shadow-offset=% \ -fzero-call-used-regs=% -fno-stack-clash-protection \ -fno-inline-functions-called-once -fsanitize=bounds-strict \ - -fstrict-flex-arrays=% \ + -fstrict-flex-arrays=% -fmin-function-alignment=% \ --param=% --param asan-% # Derived from `scripts/Makefile.clang`. @@ -350,12 +350,12 @@ rust-analyzer: $(Q)$(srctree)/scripts/generate_rust_analyzer.py \ --cfgs='core=$(core-cfgs)' --cfgs='alloc=$(alloc-cfgs)' \ $(realpath $(srctree)) $(realpath $(objtree)) \ - $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ + $(rustc_sysroot) $(RUST_LIB_SRC) $(KBUILD_EXTMOD) > \ $(if $(KBUILD_EXTMOD),$(extmod_prefix),$(objtree))/rust-project.json redirect-intrinsics = \ - __addsf3 __eqsf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __unordsf2 \ - __adddf3 __ledf2 __ltdf2 __muldf3 __unorddf2 \ + __addsf3 __eqsf2 __extendsfdf2 __gesf2 __lesf2 __ltsf2 __mulsf3 __nesf2 __truncdfsf2 __unordsf2 \ + __adddf3 __eqdf2 __ledf2 __ltdf2 __muldf3 __unorddf2 \ __muloti4 __multi3 \ __udivmodti4 __udivti3 __umodti3 diff --git a/rust/compiler_builtins.rs b/rust/compiler_builtins.rs index bba2922c6ef77fe4326d84bbe20afc80dd4b92d5..f14b8d7caf89964198313deace1bcb06fea82964 100644 --- a/rust/compiler_builtins.rs +++ b/rust/compiler_builtins.rs @@ -40,16 +40,19 @@ macro_rules! define_panicking_intrinsics( define_panicking_intrinsics!("`f32` should not be used", { __addsf3, __eqsf2, + __extendsfdf2, __gesf2, __lesf2, __ltsf2, __mulsf3, __nesf2, + __truncdfsf2, __unordsf2, }); define_panicking_intrinsics!("`f64` should not be used", { __adddf3, + __eqdf2, __ledf2, __ltdf2, __muldf3, diff --git a/rust/kernel/firmware.rs b/rust/kernel/firmware.rs index 2ba03af9f0369d74e51b97c38100e8e1d7e0fd66..dee5b4b18aec403f38746a0fbd595929b7fc3ad4 100644 --- a/rust/kernel/firmware.rs +++ b/rust/kernel/firmware.rs @@ -2,7 +2,7 @@ //! Firmware abstraction //! -//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h") +//! C header: [`include/linux/firmware.h`](srctree/include/linux/firmware.h) use crate::{bindings, device::Device, error::Error, error::Result, str::CStr}; use core::ptr::NonNull; diff --git a/rust/macros/lib.rs b/rust/macros/lib.rs index 159e75292970750e242e29cb9f5db5c327e83c15..5be0cb9db3ee490c1b01878f58b89ab758fcacac 100644 --- a/rust/macros/lib.rs +++ b/rust/macros/lib.rs @@ -94,7 +94,7 @@ use proc_macro::TokenStream; /// - `license`: ASCII string literal of the license of the kernel module (required). /// - `alias`: array of ASCII string literals of the alias names of the kernel module. /// - `firmware`: array of ASCII string literals of the firmware files of -/// the kernel module. +/// the kernel module. #[proc_macro] pub fn module(ts: TokenStream) -> TokenStream { module::module(ts) diff --git a/samples/trace_events/trace_custom_sched.c b/samples/trace_events/trace_custom_sched.c index b99d9ab7db858f8e309980bee271f6b40c6a06c7..dd409b704b35b65020a8a5ea9bdf10bb343c2273 100644 --- a/samples/trace_events/trace_custom_sched.c +++ b/samples/trace_events/trace_custom_sched.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) fmt #include -#include #include #include diff --git a/scripts/Makefile.build b/scripts/Makefile.build index efacca63c89767bbe1f66d4458903c71178b8903..a5ac8ed1936fe8a9a0b410657cc8b2b8848fd843 100644 --- a/scripts/Makefile.build +++ b/scripts/Makefile.build @@ -487,7 +487,7 @@ $(subdir-ym): need-modorder=$(if $(filter $@/modules.order, $(subdir-modorder)),1) \ $(filter $@/%, $(single-subdir-goals)) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index fe3668dc4954b2afb945e75768ac0846d50a0672..207325eaf1d1cb936a6619e75463f25640e08f49 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -400,26 +400,23 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE $(obj)/%.dtbo.S: $(obj)/%.dtbo FORCE $(call if_changed,wrap_S_dtb) -quiet_cmd_dtc = DTC $@ +quiet_dtb_check_tag = $(if $(dtb-check-enabled),[C], ) +cmd_dtb_check = $(if $(dtb-check-enabled),; $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) $@ || true) + +quiet_cmd_dtc = DTC $(quiet_dtb_check_tag) $@ cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \ $(DTC) -o $@ -b 0 \ $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \ -d $(depfile).dtc.tmp $(dtc-tmp) ; \ - cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) - -DT_CHECK_CMD = $(DT_CHECKER) $(DT_CHECKER_FLAGS) -u $(srctree)/$(DT_BINDING_DIR) -p $(DT_TMP_SCHEMA) + cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile) \ + $(cmd_dtb_check) # NOTE: # Do not replace $(filter %.dtb %.dtbo, $^) with $(real-prereqs). When a single # DTB is turned into a multi-blob DTB, $^ will contain header file dependencies # recorded in the .*.cmd file. -ifneq ($(CHECK_DTBS),) -quiet_cmd_fdtoverlay = DTOVLCH $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_fdtoverlay = DTOVL $@ - cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) -endif +quiet_cmd_fdtoverlay = OVL $(quiet_dtb_check_tag) $@ + cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(filter %.dtb %.dtbo, $^) $(cmd_dtb_check) $(multi-dtb-y): FORCE $(call if_changed,fdtoverlay) @@ -430,16 +427,11 @@ DT_CHECKER ?= dt-validate DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),-l $(DT_SCHEMA_FILES),-m) DT_BINDING_DIR := Documentation/devicetree/bindings DT_TMP_SCHEMA := $(objtree)/$(DT_BINDING_DIR)/processed-schema.json - -quiet_cmd_dtb = DTC_CHK $@ - cmd_dtb = $(cmd_dtc) ; $(DT_CHECK_CMD) $@ || true -else -quiet_cmd_dtb = $(quiet_cmd_dtc) - cmd_dtb = $(cmd_dtc) +dtb-check-enabled = $(if $(filter %.dtb, $@),y) endif $(obj)/%.dtb: $(obj)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE - $(call if_changed_dep,dtb) + $(call if_changed_dep,dtc) $(obj)/%.dtbo: $(src)/%.dtso $(DTC) FORCE $(call if_changed_dep,dtc) diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal index 1fa98b5e952b4f943a8605c7348d585b8ef5d9c4..306a6bb86e4dc80dc8eaec5836ffa2c20f4b45f6 100644 --- a/scripts/Makefile.modfinal +++ b/scripts/Makefile.modfinal @@ -62,7 +62,7 @@ endif targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 49946cb968440c6fecd63b8076e1a46a87d80aee..5ceecbed31eb771256962cf1e18898748db27dde 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -33,7 +33,7 @@ targets += vmlinux vmlinux: scripts/link-vmlinux.sh vmlinux.o $(KBUILD_LDS) FORCE +$(call if_changed_dep,link_vmlinux) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/Makefile.vmlinux_o b/scripts/Makefile.vmlinux_o index 6de297916ce680accc8eff02aa2957cf8c915f19..d64070b6b4bce068429616d320dfdf0237c470f9 100644 --- a/scripts/Makefile.vmlinux_o +++ b/scripts/Makefile.vmlinux_o @@ -87,7 +87,7 @@ targets += modules.builtin modules.builtin: modules.builtin.modinfo FORCE $(call if_changed,modules_builtin) -# Add FORCE to the prequisites of a target to force it to be always rebuilt. +# Add FORCE to the prerequisites of a target to force it to be always rebuilt. # --------------------------------------------------------------------------- PHONY += FORCE diff --git a/scripts/gcc-plugins/randomize_layout_plugin.c b/scripts/gcc-plugins/randomize_layout_plugin.c index 746ff2d272f25667d31dbe2299ce00e2f9ea000a..5694df3da2e95b0a606f7174acdd89adc9b1a9e0 100644 --- a/scripts/gcc-plugins/randomize_layout_plugin.c +++ b/scripts/gcc-plugins/randomize_layout_plugin.c @@ -19,10 +19,6 @@ #include "gcc-common.h" #include "randomize_layout_seed.h" -#if BUILDING_GCC_MAJOR < 4 || (BUILDING_GCC_MAJOR == 4 && BUILDING_GCC_MINOR < 7) -#error "The RANDSTRUCT plugin requires GCC 4.7 or newer." -#endif - #define ORIG_TYPE_NAME(node) \ (TYPE_NAME(TYPE_MAIN_VARIANT(node)) != NULL_TREE ? ((const unsigned char *)IDENTIFIER_POINTER(TYPE_NAME(TYPE_MAIN_VARIANT(node)))) : (const unsigned char *)"anonymous") diff --git a/scripts/generate_rust_analyzer.py b/scripts/generate_rust_analyzer.py index f270c7b0cf345d948db8d03f488dc3de45a7e599..d2bc63cde8c6a39e74c95abc7329190fe460207d 100755 --- a/scripts/generate_rust_analyzer.py +++ b/scripts/generate_rust_analyzer.py @@ -145,6 +145,7 @@ def main(): parser.add_argument('--cfgs', action='append', default=[]) parser.add_argument("srctree", type=pathlib.Path) parser.add_argument("objtree", type=pathlib.Path) + parser.add_argument("sysroot", type=pathlib.Path) parser.add_argument("sysroot_src", type=pathlib.Path) parser.add_argument("exttree", type=pathlib.Path, nargs="?") args = parser.parse_args() @@ -154,9 +155,12 @@ def main(): level=logging.INFO if args.verbose else logging.WARNING ) + # Making sure that the `sysroot` and `sysroot_src` belong to the same toolchain. + assert args.sysroot in args.sysroot_src.parents + rust_project = { "crates": generate_crates(args.srctree, args.objtree, args.sysroot_src, args.exttree, args.cfgs), - "sysroot_src": str(args.sysroot_src), + "sysroot": str(args.sysroot), } json.dump(rust_project, sys.stdout, sort_keys=True, indent=4) diff --git a/scripts/generate_rust_target.rs b/scripts/generate_rust_target.rs index 87f34925eb7b71b7858124627fb2fee596e17aa5..404edf7587e088ca057ca943ff74e397e77d4a6e 100644 --- a/scripts/generate_rust_target.rs +++ b/scripts/generate_rust_target.rs @@ -162,7 +162,7 @@ fn main() { "data-layout", "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } @@ -179,7 +179,7 @@ fn main() { "data-layout", "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128", ); - let mut features = "-3dnow,-3dnowa,-mmx,+soft-float".to_string(); + let mut features = "-mmx,+soft-float".to_string(); if cfg.has("MITIGATION_RETPOLINE") { features += ",+retpoline-external-thunk"; } diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c index 0ed873491bf555e64da7b92662459d28ad8e7382..123dab0572f80045d06c053a5d905bc15660b857 100644 --- a/scripts/kallsyms.c +++ b/scripts/kallsyms.c @@ -5,8 +5,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. * - * Usage: kallsyms [--all-symbols] [--absolute-percpu] - * [--lto-clang] in.map > out.S + * Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S * * Table compression uses all the unused char codes on the symbols and * maps these to the most used substrings (tokens). For instance, it might @@ -62,7 +61,6 @@ static struct sym_entry **table; static unsigned int table_size, table_cnt; static int all_symbols; static int absolute_percpu; -static int lto_clang; static int token_profit[0x10000]; @@ -73,8 +71,7 @@ static unsigned char best_table_len[256]; static void usage(void) { - fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] " - "[--lto-clang] in.map > out.S\n"); + fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] in.map > out.S\n"); exit(1); } @@ -344,25 +341,6 @@ static bool symbol_absolute(const struct sym_entry *s) return s->percpu_absolute; } -static void cleanup_symbol_name(char *s) -{ - char *p; - - /* - * ASCII[.] = 2e - * ASCII[0-9] = 30,39 - * ASCII[A-Z] = 41,5a - * ASCII[_] = 5f - * ASCII[a-z] = 61,7a - * - * As above, replacing the first '.' in ".llvm." with '\0' does not - * affect the main sorting, but it helps us with subsorting. - */ - p = strstr(s, ".llvm."); - if (p) - *p = '\0'; -} - static int compare_names(const void *a, const void *b) { int ret; @@ -526,10 +504,6 @@ static void write_src(void) output_address(relative_base); printf("\n"); - if (lto_clang) - for (i = 0; i < table_cnt; i++) - cleanup_symbol_name((char *)table[i]->sym); - sort_symbols_by_name(); output_label("kallsyms_seqs_of_names"); for (i = 0; i < table_cnt; i++) @@ -807,7 +781,6 @@ int main(int argc, char **argv) static const struct option long_options[] = { {"all-symbols", no_argument, &all_symbols, 1}, {"absolute-percpu", no_argument, &absolute_percpu, 1}, - {"lto-clang", no_argument, <o_clang, 1}, {}, }; diff --git a/scripts/kconfig/merge_config.sh b/scripts/kconfig/merge_config.sh index 902eb429b9dbd92e060925fdefd06bf2fdd45b4b..0b7952471c18f6882b8978f839f3170bb41fb01f 100755 --- a/scripts/kconfig/merge_config.sh +++ b/scripts/kconfig/merge_config.sh @@ -167,6 +167,8 @@ for ORIG_MERGE_FILE in $MERGE_LIST ; do sed -i "/$CFG[ =]/d" $MERGE_FILE fi done + # In case the previous file lacks a new line at the end + echo >> $TMP_FILE cat $MERGE_FILE >> $TMP_FILE done diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index f7b2503cdba95610e210de1da71fa5f56d03600a..070a319140e893f9dd050307257bb91dbccf32b0 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -156,10 +156,6 @@ kallsyms() kallsymopt="${kallsymopt} --absolute-percpu" fi - if is_enabled CONFIG_LTO_CLANG; then - kallsymopt="${kallsymopt} --lto-clang" - fi - info KSYMS "${2}.S" scripts/kallsyms ${kallsymopt} "${1}" > "${2}.S" @@ -219,7 +215,8 @@ kallsymso= strip_debug= if is_enabled CONFIG_KALLSYMS; then - kallsyms /dev/null .tmp_vmlinux0.kallsyms + truncate -s0 .tmp_vmlinux.kallsyms0.syms + kallsyms .tmp_vmlinux.kallsyms0.syms .tmp_vmlinux0.kallsyms fi if is_enabled CONFIG_KALLSYMS || is_enabled CONFIG_DEBUG_INFO_BTF; then diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index 591d85e8ca7e357ac80ab99eecfcba4cc2985423..845e24eb372e7f2aca193a4f424b85234b4fc9c2 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -53,6 +53,7 @@ 39 common umount2 sys_umount 40 common mount sys_mount 41 common pivot_root sys_pivot_root +42 common nfsservctl sys_ni_syscall 43 32 statfs64 sys_statfs64 compat_sys_statfs64 43 64 statfs sys_statfs 44 32 fstatfs64 sys_fstatfs64 compat_sys_fstatfs64 @@ -98,9 +99,9 @@ 77 common tee sys_tee 78 common readlinkat sys_readlinkat 79 stat64 fstatat64 sys_fstatat64 -79 newstat fstatat sys_newfstatat +79 64 newfstatat sys_newfstatat 80 stat64 fstat64 sys_fstat64 -80 newstat fstat sys_newfstat +80 64 fstat sys_newfstat 81 common sync sys_sync 82 common fsync sys_fsync 83 common fdatasync sys_fdatasync @@ -402,4 +403,3 @@ 460 common lsm_set_self_attr sys_lsm_set_self_attr 461 common lsm_list_modules sys_lsm_list_modules 462 common mseal sys_mseal -467 common uretprobe sys_uretprobe diff --git a/security/keys/trusted-keys/trusted_dcp.c b/security/keys/trusted-keys/trusted_dcp.c index b5f81a05be3676457fee411b753ac64588eaf4de..4edc5bbbcda3c99553896d6cc6c12226697fa727 100644 --- a/security/keys/trusted-keys/trusted_dcp.c +++ b/security/keys/trusted-keys/trusted_dcp.c @@ -186,20 +186,21 @@ out: return ret; } -static int decrypt_blob_key(u8 *key) +static int decrypt_blob_key(u8 *encrypted_key, u8 *plain_key) { - return do_dcp_crypto(key, key, false); + return do_dcp_crypto(encrypted_key, plain_key, false); } -static int encrypt_blob_key(u8 *key) +static int encrypt_blob_key(u8 *plain_key, u8 *encrypted_key) { - return do_dcp_crypto(key, key, true); + return do_dcp_crypto(plain_key, encrypted_key, true); } static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; blen = calc_blob_len(p->key_len); if (blen > MAX_BLOB_SIZE) @@ -207,30 +208,36 @@ static int trusted_dcp_seal(struct trusted_key_payload *p, char *datablob) b->fmt_version = DCP_BLOB_VERSION; get_random_bytes(b->nonce, AES_KEYSIZE_128); - get_random_bytes(b->blob_key, AES_KEYSIZE_128); + get_random_bytes(plain_blob_key, AES_KEYSIZE_128); - ret = do_aead_crypto(p->key, b->payload, p->key_len, b->blob_key, + ret = do_aead_crypto(p->key, b->payload, p->key_len, plain_blob_key, b->nonce, true); if (ret) { pr_err("Unable to encrypt blob payload: %i\n", ret); - return ret; + goto out; } - ret = encrypt_blob_key(b->blob_key); + ret = encrypt_blob_key(plain_blob_key, b->blob_key); if (ret) { pr_err("Unable to encrypt blob key: %i\n", ret); - return ret; + goto out; } - b->payload_len = get_unaligned_le32(&p->key_len); + put_unaligned_le32(p->key_len, &b->payload_len); p->blob_len = blen; - return 0; + ret = 0; + +out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + + return ret; } static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) { struct dcp_blob_fmt *b = (struct dcp_blob_fmt *)p->blob; int blen, ret; + u8 plain_blob_key[AES_KEYSIZE_128]; if (b->fmt_version != DCP_BLOB_VERSION) { pr_err("DCP blob has bad version: %i, expected %i\n", @@ -248,14 +255,14 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) goto out; } - ret = decrypt_blob_key(b->blob_key); + ret = decrypt_blob_key(b->blob_key, plain_blob_key); if (ret) { pr_err("Unable to decrypt blob key: %i\n", ret); goto out; } ret = do_aead_crypto(b->payload, p->key, p->key_len + DCP_BLOB_AUTHLEN, - b->blob_key, b->nonce, false); + plain_blob_key, b->nonce, false); if (ret) { pr_err("Unwrap of DCP payload failed: %i\n", ret); goto out; @@ -263,6 +270,8 @@ static int trusted_dcp_unseal(struct trusted_key_payload *p, char *datablob) ret = 0; out: + memzero_explicit(plain_blob_key, sizeof(plain_blob_key)); + return ret; } diff --git a/security/selinux/avc.c b/security/selinux/avc.c index 32eb67fb3e42c0f0e88b70ee37bf9a3afcd8e7d4..b49c44869dc462720bc9110f4a3aa895edb06537 100644 --- a/security/selinux/avc.c +++ b/security/selinux/avc.c @@ -330,12 +330,12 @@ static int avc_add_xperms_decision(struct avc_node *node, { struct avc_xperms_decision_node *dest_xpd; - node->ae.xp_node->xp.len++; dest_xpd = avc_xperms_decision_alloc(src->used); if (!dest_xpd) return -ENOMEM; avc_copy_xperms_decision(&dest_xpd->xpd, src); list_add(&dest_xpd->xpd_list, &node->ae.xp_node->xpd_head); + node->ae.xp_node->xp.len++; return 0; } @@ -907,7 +907,11 @@ static int avc_update_node(u32 event, u32 perms, u8 driver, u8 xperm, u32 ssid, node->ae.avd.auditdeny &= ~perms; break; case AVC_CALLBACK_ADD_XPERMS: - avc_add_xperms_decision(node, xpd); + rc = avc_add_xperms_decision(node, xpd); + if (rc) { + avc_node_kill(node); + goto out_unlock; + } break; } avc_node_replace(node, orig); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 55c78c318ccd78b8dcec24886c0ff5e919c67b85..bfa61e005aace203b4e66f799285f22c33e08902 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3852,7 +3852,17 @@ static int selinux_file_mprotect(struct vm_area_struct *vma, if (default_noexec && (prot & PROT_EXEC) && !(vma->vm_flags & VM_EXEC)) { int rc = 0; - if (vma_is_initial_heap(vma)) { + /* + * We don't use the vma_is_initial_heap() helper as it has + * a history of problems and is currently broken on systems + * where there is no heap, e.g. brk == start_brk. Before + * replacing the conditional below with vma_is_initial_heap(), + * or something similar, please ensure that the logic is the + * same as what we have below or you have tested every possible + * corner case you can think to test. + */ + if (vma->vm_start >= vma->vm_mm->start_brk && + vma->vm_end <= vma->vm_mm->brk) { rc = avc_has_perm(sid, sid, SECCLASS_PROCESS, PROCESS__EXECHEAP, NULL); } else if (!vma->vm_file && (vma_is_initial_stack(vma) || diff --git a/sound/core/seq/seq_ports.h b/sound/core/seq/seq_ports.h index b111382f697aa6c6ce7b9bc0d2ff68deda804855..9e36738c0dd049698d72abf31cbbb4b383f8b649 100644 --- a/sound/core/seq/seq_ports.h +++ b/sound/core/seq/seq_ports.h @@ -7,6 +7,7 @@ #define __SND_SEQ_PORTS_H #include +#include #include "seq_lock.h" /* list of 'exported' ports */ @@ -42,17 +43,6 @@ struct snd_seq_port_subs_info { int (*close)(void *private_data, struct snd_seq_port_subscribe *info); }; -/* context for converting from legacy control event to UMP packet */ -struct snd_seq_ump_midi2_bank { - bool rpn_set; - bool nrpn_set; - bool bank_set; - unsigned char cc_rpn_msb, cc_rpn_lsb; - unsigned char cc_nrpn_msb, cc_nrpn_lsb; - unsigned char cc_data_msb, cc_data_lsb; - unsigned char cc_bank_msb, cc_bank_lsb; -}; - struct snd_seq_client_port { struct snd_seq_addr addr; /* client/port number */ @@ -88,7 +78,7 @@ struct snd_seq_client_port { unsigned char ump_group; #if IS_ENABLED(CONFIG_SND_SEQ_UMP) - struct snd_seq_ump_midi2_bank midi2_bank[16]; /* per channel */ + struct ump_cvt_to_ump_bank midi2_bank[16]; /* per channel */ #endif }; diff --git a/sound/core/seq/seq_ump_convert.c b/sound/core/seq/seq_ump_convert.c index e90b27a135e6f14d4cdd5f3ce47c5dcf4788a42e..4dd540cbb1cbbcbf0df8bf9f563ca70ddfe2a4b8 100644 --- a/sound/core/seq/seq_ump_convert.c +++ b/sound/core/seq/seq_ump_convert.c @@ -368,7 +368,7 @@ static int cvt_ump_midi1_to_midi2(struct snd_seq_client *dest, struct snd_seq_ump_event ev_cvt; const union snd_ump_midi1_msg *midi1 = (const union snd_ump_midi1_msg *)event->ump; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)ev_cvt.ump; - struct snd_seq_ump_midi2_bank *cc; + struct ump_cvt_to_ump_bank *cc; ev_cvt = *event; memset(&ev_cvt.ump, 0, sizeof(ev_cvt.ump)); @@ -789,28 +789,45 @@ static int paf_ev_to_ump_midi2(const struct snd_seq_event *event, return 1; } +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) +{ + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + /* set up the MIDI2 RPN/NRPN packet data from the parsed info */ -static void fill_rpn(struct snd_seq_ump_midi2_bank *cc, - union snd_ump_midi2_msg *data, - unsigned char channel) +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *data, + unsigned char channel, + bool flush) { + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { data->rpn.status = UMP_MSG_STATUS_RPN; data->rpn.bank = cc->cc_rpn_msb; data->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { data->rpn.status = UMP_MSG_STATUS_NRPN; data->rpn.bank = cc->cc_nrpn_msb; data->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + data->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); data->rpn.channel = channel; - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert CC event to MIDI 2.0 UMP */ @@ -822,29 +839,39 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; unsigned char val = event->data.control.value & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; + int ret; /* process special CC's (bank/rpn/nrpn) */ switch (index) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_msb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->rpn_set = 1; cc->cc_rpn_lsb = val; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = val; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = val; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = val; - return 0; // skip + return fill_rpn(cc, data, channel, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = val; @@ -854,11 +881,9 @@ static int cc_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = val; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = val; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = status; @@ -887,7 +912,7 @@ static int pgm_ev_to_ump_midi2(const struct snd_seq_event *event, unsigned char status) { unsigned char channel = event->data.control.channel & 0x0f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; data->pg.status = status; data->pg.channel = channel; @@ -924,8 +949,9 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, { unsigned char channel = event->data.control.channel & 0x0f; unsigned char index = event->data.control.param & 0x7f; - struct snd_seq_ump_midi2_bank *cc = &dest_port->midi2_bank[channel]; + struct ump_cvt_to_ump_bank *cc = &dest_port->midi2_bank[channel]; unsigned char msb, lsb; + int ret; msb = (event->data.control.value >> 7) & 0x7f; lsb = event->data.control.value & 0x7f; @@ -939,28 +965,27 @@ static int ctrl14_ev_to_ump_midi2(const struct snd_seq_event *event, cc->cc_bank_lsb = lsb; return 0; // skip case UMP_CC_RPN_MSB: - cc->cc_rpn_msb = msb; - fallthrough; case UMP_CC_RPN_LSB: - cc->rpn_set = 1; + ret = fill_rpn(cc, data, channel, true); + cc->cc_rpn_msb = msb; cc->cc_rpn_lsb = lsb; - return 0; // skip + cc->rpn_set = 1; + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: - cc->cc_nrpn_msb = msb; - fallthrough; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, data, channel, true); + cc->cc_nrpn_msb = msb; cc->nrpn_set = 1; cc->cc_nrpn_lsb = lsb; - return 0; // skip + return ret; case UMP_CC_DATA: - cc->cc_data_msb = msb; - fallthrough; case UMP_CC_DATA_LSB: + cc->cc_data_msb_set = cc->cc_data_lsb_set = 1; + cc->cc_data_msb = msb; cc->cc_data_lsb = lsb; - if (!(cc->rpn_set || cc->nrpn_set)) - return 0; // skip - fill_rpn(cc, data, channel); - return 1; + return fill_rpn(cc, data, channel, false); } data->cc.status = UMP_MSG_STATUS_CC; @@ -1192,44 +1217,53 @@ static int cvt_sysex_to_ump(struct snd_seq_client *dest, { struct snd_seq_ump_event ev_cvt; unsigned char status; - u8 buf[6], *xbuf; + u8 buf[8], *xbuf; int offset = 0; int len, err; + bool finished = false; if (!snd_seq_ev_is_variable(event)) return 0; setup_ump_event(&ev_cvt, event); - for (;;) { + while (!finished) { len = snd_seq_expand_var_event_at(event, sizeof(buf), buf, offset); if (len <= 0) break; - if (WARN_ON(len > 6)) + if (WARN_ON(len > sizeof(buf))) break; - offset += len; + xbuf = buf; + status = UMP_SYSEX_STATUS_CONTINUE; + /* truncate the sysex start-marker */ if (*xbuf == UMP_MIDI1_MSG_SYSEX_START) { status = UMP_SYSEX_STATUS_START; - xbuf++; len--; - if (len > 0 && xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + offset++; + xbuf++; + } + + /* if the last of this packet or the 1st byte of the next packet + * is the end-marker, finish the transfer with this packet + */ + if (len > 0 && len < 8 && + xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + if (status == UMP_SYSEX_STATUS_START) status = UMP_SYSEX_STATUS_SINGLE; - len--; - } - } else { - if (xbuf[len - 1] == UMP_MIDI1_MSG_SYSEX_END) { + else status = UMP_SYSEX_STATUS_END; - len--; - } else { - status = UMP_SYSEX_STATUS_CONTINUE; - } + len--; + finished = true; } + + len = min(len, 6); fill_sysex7_ump(dest_port, ev_cvt.ump, status, xbuf, len); err = __snd_seq_deliver_single_event(dest, dest_port, (struct snd_seq_event *)&ev_cvt, atomic, hop); if (err < 0) return err; + offset += len; } return 0; } diff --git a/sound/core/timer.c b/sound/core/timer.c index d104adc75a8b0ebad1a43687b9006a39d7af4658..71a07c1662f5c4fc33dc11799225c710d03ef309 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -547,7 +547,7 @@ static int snd_timer_start1(struct snd_timer_instance *timeri, /* check the actual time for the start tick; * bail out as error if it's way too low (< 100us) */ - if (start) { + if (start && !(timer->hw.flags & SNDRV_TIMER_HW_SLAVE)) { if ((u64)snd_timer_hw_resolution(timer) * ticks < 100000) return -EINVAL; } diff --git a/sound/core/ump_convert.c b/sound/core/ump_convert.c index f67c44c83fde445258c59f34897b73a913c17cf1..0fe13d03165686de984f43d01a716fc31896f56d 100644 --- a/sound/core/ump_convert.c +++ b/sound/core/ump_convert.c @@ -287,25 +287,42 @@ static int cvt_legacy_system_to_ump(struct ump_cvt_to_ump *cvt, return 4; } -static void fill_rpn(struct ump_cvt_to_ump_bank *cc, - union snd_ump_midi2_msg *midi2) +static void reset_rpn(struct ump_cvt_to_ump_bank *cc) { + cc->rpn_set = 0; + cc->nrpn_set = 0; + cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; + cc->cc_data_msb = cc->cc_data_lsb = 0; + cc->cc_data_msb_set = cc->cc_data_lsb_set = 0; +} + +static int fill_rpn(struct ump_cvt_to_ump_bank *cc, + union snd_ump_midi2_msg *midi2, + bool flush) +{ + if (!(cc->cc_data_lsb_set || cc->cc_data_msb_set)) + return 0; // skip + /* when not flushing, wait for complete data set */ + if (!flush && (!cc->cc_data_lsb_set || !cc->cc_data_msb_set)) + return 0; // skip + if (cc->rpn_set) { midi2->rpn.status = UMP_MSG_STATUS_RPN; midi2->rpn.bank = cc->cc_rpn_msb; midi2->rpn.index = cc->cc_rpn_lsb; - cc->rpn_set = 0; - cc->cc_rpn_msb = cc->cc_rpn_lsb = 0; - } else { + } else if (cc->nrpn_set) { midi2->rpn.status = UMP_MSG_STATUS_NRPN; midi2->rpn.bank = cc->cc_nrpn_msb; midi2->rpn.index = cc->cc_nrpn_lsb; - cc->nrpn_set = 0; - cc->cc_nrpn_msb = cc->cc_nrpn_lsb = 0; + } else { + return 0; // skip } + midi2->rpn.data = upscale_14_to_32bit((cc->cc_data_msb << 7) | cc->cc_data_lsb); - cc->cc_data_msb = cc->cc_data_lsb = 0; + + reset_rpn(cc); + return 1; } /* convert to a MIDI 1.0 Channel Voice message */ @@ -318,6 +335,7 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, struct ump_cvt_to_ump_bank *cc; union snd_ump_midi2_msg *midi2 = (union snd_ump_midi2_msg *)data; unsigned char status, channel; + int ret; BUILD_BUG_ON(sizeof(union snd_ump_midi1_msg) != 4); BUILD_BUG_ON(sizeof(union snd_ump_midi2_msg) != 8); @@ -358,24 +376,33 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, case UMP_MSG_STATUS_CC: switch (buf[1]) { case UMP_CC_RPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_msb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_RPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->rpn_set = 1; cc->cc_rpn_lsb = buf[2]; - return 0; // skip + if (cc->cc_rpn_msb == 0x7f && cc->cc_rpn_lsb == 0x7f) + reset_rpn(cc); + return ret; case UMP_CC_NRPN_MSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_msb = buf[2]; - return 0; // skip + return ret; case UMP_CC_NRPN_LSB: + ret = fill_rpn(cc, midi2, true); cc->nrpn_set = 1; cc->cc_nrpn_lsb = buf[2]; - return 0; // skip + return ret; case UMP_CC_DATA: + cc->cc_data_msb_set = 1; cc->cc_data_msb = buf[2]; - return 0; // skip + return fill_rpn(cc, midi2, false); case UMP_CC_BANK_SELECT: cc->bank_set = 1; cc->cc_bank_msb = buf[2]; @@ -385,12 +412,9 @@ static int cvt_legacy_cmd_to_ump(struct ump_cvt_to_ump *cvt, cc->cc_bank_lsb = buf[2]; return 0; // skip case UMP_CC_DATA_LSB: + cc->cc_data_lsb_set = 1; cc->cc_data_lsb = buf[2]; - if (cc->rpn_set || cc->nrpn_set) - fill_rpn(cc, midi2); - else - return 0; // skip - break; + return fill_rpn(cc, midi2, false); default: midi2->cc.index = buf[1]; midi2->cc.data = upscale_7_to_32bit(buf[2]); diff --git a/sound/firewire/amdtp-stream.c b/sound/firewire/amdtp-stream.c index 1a163bbcabd79e671e7b50dd8d61b289ee9d3cf3..c827d7d8d8003564f48f091134ef2666dd06a852 100644 --- a/sound/firewire/amdtp-stream.c +++ b/sound/firewire/amdtp-stream.c @@ -77,6 +77,8 @@ // overrun. Actual device can skip more, then this module stops the packet streaming. #define IR_JUMBO_PAYLOAD_MAX_SKIP_CYCLES 5 +static void pcm_period_work(struct work_struct *work); + /** * amdtp_stream_init - initialize an AMDTP stream structure * @s: the AMDTP stream to initialize @@ -105,6 +107,7 @@ int amdtp_stream_init(struct amdtp_stream *s, struct fw_unit *unit, s->flags = flags; s->context = ERR_PTR(-1); mutex_init(&s->mutex); + INIT_WORK(&s->period_work, pcm_period_work); s->packet_index = 0; init_waitqueue_head(&s->ready_wait); @@ -347,6 +350,7 @@ EXPORT_SYMBOL(amdtp_stream_get_max_payload); */ void amdtp_stream_pcm_prepare(struct amdtp_stream *s) { + cancel_work_sync(&s->period_work); s->pcm_buffer_pointer = 0; s->pcm_period_pointer = 0; } @@ -611,19 +615,21 @@ static void update_pcm_pointers(struct amdtp_stream *s, // The program in user process should periodically check the status of intermediate // buffer associated to PCM substream to process PCM frames in the buffer, instead // of receiving notification of period elapsed by poll wait. - if (!pcm->runtime->no_period_wakeup) { - if (in_softirq()) { - // In software IRQ context for 1394 OHCI. - snd_pcm_period_elapsed(pcm); - } else { - // In process context of ALSA PCM application under acquired lock of - // PCM substream. - snd_pcm_period_elapsed_under_stream_lock(pcm); - } - } + if (!pcm->runtime->no_period_wakeup) + queue_work(system_highpri_wq, &s->period_work); } } +static void pcm_period_work(struct work_struct *work) +{ + struct amdtp_stream *s = container_of(work, struct amdtp_stream, + period_work); + struct snd_pcm_substream *pcm = READ_ONCE(s->pcm); + + if (pcm) + snd_pcm_period_elapsed(pcm); +} + static int queue_packet(struct amdtp_stream *s, struct fw_iso_packet *params, bool sched_irq) { @@ -1849,11 +1855,14 @@ unsigned long amdtp_domain_stream_pcm_pointer(struct amdtp_domain *d, { struct amdtp_stream *irq_target = d->irq_target; - // Process isochronous packets queued till recent isochronous cycle to handle PCM frames. if (irq_target && amdtp_stream_running(irq_target)) { - // In software IRQ context, the call causes dead-lock to disable the tasklet - // synchronously. - if (!in_softirq()) + // use wq to prevent AB/BA deadlock competition for + // substream lock: + // fw_iso_context_flush_completions() acquires + // lock by ohci_flush_iso_completions(), + // amdtp-stream process_rx_packets() attempts to + // acquire same lock by snd_pcm_elapsed() + if (current_work() != &s->period_work) fw_iso_context_flush_completions(irq_target->context); } @@ -1909,6 +1918,7 @@ static void amdtp_stream_stop(struct amdtp_stream *s) return; } + cancel_work_sync(&s->period_work); fw_iso_context_stop(s->context); fw_iso_context_destroy(s->context); s->context = ERR_PTR(-1); diff --git a/sound/firewire/amdtp-stream.h b/sound/firewire/amdtp-stream.h index a1ed2e80f91a7cc07fecf55288a20a240d0900da..775db3fc4959f57d006a247d5f5d6d74586a110c 100644 --- a/sound/firewire/amdtp-stream.h +++ b/sound/firewire/amdtp-stream.h @@ -191,6 +191,7 @@ struct amdtp_stream { /* For a PCM substream processing. */ struct snd_pcm_substream *pcm; + struct work_struct period_work; snd_pcm_uframes_t pcm_buffer_pointer; unsigned int pcm_period_pointer; unsigned int pcm_frame_multiplier; diff --git a/sound/pci/hda/cs35l41_hda.c b/sound/pci/hda/cs35l41_hda.c index 4b411ed8c3fe0e794a86c13f33351cb7be27b2b1..d68bf7591d90c19a601499d9c20612d8c1a12603 100644 --- a/sound/pci/hda/cs35l41_hda.c +++ b/sound/pci/hda/cs35l41_hda.c @@ -133,19 +133,8 @@ static const struct reg_sequence cs35l41_hda_mute[] = { { CS35L41_AMP_DIG_VOL_CTRL, 0x0000A678 }, // AMP_HPF_PCM_EN = 1, AMP_VOL_PCM Mute }; -static void cs35l41_add_controls(struct cs35l41_hda *cs35l41) -{ - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l41->amp_name; - info.fw_type = cs35l41->firmware_type; - info.card = cs35l41->codec->card; - - hda_cs_dsp_add_controls(&cs35l41->cs_dsp, &info); -} - static const struct cs_dsp_client_ops client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l41_request_tuning_param_file(struct cs35l41_hda *cs35l41, char *tuning_filename, @@ -603,8 +592,6 @@ static int cs35l41_init_dsp(struct cs35l41_hda *cs35l41) if (ret) goto err; - cs35l41_add_controls(cs35l41); - cs35l41_hda_apply_calibration(cs35l41); err: diff --git a/sound/pci/hda/cs35l56_hda.c b/sound/pci/hda/cs35l56_hda.c index 96d3f13c5abf9f24636294a8262b54b579e08269..a9dfd62637cf4cdbab2cca065751d459bdf34ab3 100644 --- a/sound/pci/hda/cs35l56_hda.c +++ b/sound/pci/hda/cs35l56_hda.c @@ -413,7 +413,7 @@ static void cs35l56_hda_remove_controls(struct cs35l56_hda *cs35l56) } static const struct cs_dsp_client_ops cs35l56_hda_client_ops = { - .control_remove = hda_cs_dsp_control_remove, + /* cs_dsp requires the client to provide this even if it is empty */ }; static int cs35l56_hda_request_firmware_file(struct cs35l56_hda *cs35l56, @@ -559,18 +559,6 @@ static void cs35l56_hda_release_firmware_files(const struct firmware *wmfw_firmw kfree(coeff_filename); } -static void cs35l56_hda_create_dsp_controls_work(struct work_struct *work) -{ - struct cs35l56_hda *cs35l56 = container_of(work, struct cs35l56_hda, control_work); - struct hda_cs_dsp_ctl_info info; - - info.device_name = cs35l56->amp_name; - info.fw_type = HDA_CS_DSP_FW_MISC; - info.card = cs35l56->codec->card; - - hda_cs_dsp_add_controls(&cs35l56->cs_dsp, &info); -} - static void cs35l56_hda_apply_calibration(struct cs35l56_hda *cs35l56) { int ret; @@ -595,26 +583,15 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) char *wmfw_filename = NULL; unsigned int preloaded_fw_ver; bool firmware_missing; - bool add_dsp_controls_required = false; int ret; - /* - * control_work must be flushed before proceeding, but we can't do that - * here as it would create a deadlock on controls_rwsem so it must be - * performed before queuing dsp_work. - */ - WARN_ON_ONCE(work_busy(&cs35l56->control_work)); - /* * Prepare for a new DSP power-up. If the DSP has had firmware * downloaded previously then it needs to be powered down so that it - * can be updated and if hadn't been patched before then the controls - * will need to be added once firmware download succeeds. + * can be updated. */ if (cs35l56->base.fw_patched) cs_dsp_power_down(&cs35l56->cs_dsp); - else - add_dsp_controls_required = true; cs35l56->base.fw_patched = false; @@ -698,15 +675,6 @@ static void cs35l56_hda_fw_load(struct cs35l56_hda *cs35l56) CS35L56_FIRMWARE_MISSING); cs35l56->base.fw_patched = true; - /* - * Adding controls is deferred to prevent a lock inversion - ALSA takes - * the controls_rwsem when adding a control, the get() / put() - * functions of a control are called holding controls_rwsem and those - * that depend on running firmware wait for dsp_work() to complete. - */ - if (add_dsp_controls_required) - queue_work(system_long_wq, &cs35l56->control_work); - ret = cs_dsp_run(&cs35l56->cs_dsp); if (ret) dev_dbg(cs35l56->base.dev, "%s: cs_dsp_run ret %d\n", __func__, ret); @@ -753,7 +721,6 @@ static int cs35l56_hda_bind(struct device *dev, struct device *master, void *mas strscpy(comp->name, dev_name(dev), sizeof(comp->name)); comp->playback_hook = cs35l56_hda_playback_hook; - flush_work(&cs35l56->control_work); queue_work(system_long_wq, &cs35l56->dsp_work); cs35l56_hda_create_controls(cs35l56); @@ -775,7 +742,6 @@ static void cs35l56_hda_unbind(struct device *dev, struct device *master, void * struct hda_component *comp; cancel_work_sync(&cs35l56->dsp_work); - cancel_work_sync(&cs35l56->control_work); cs35l56_hda_remove_controls(cs35l56); @@ -806,7 +772,6 @@ static int cs35l56_hda_system_suspend(struct device *dev) struct cs35l56_hda *cs35l56 = dev_get_drvdata(dev); cs35l56_hda_wait_dsp_ready(cs35l56); - flush_work(&cs35l56->control_work); if (cs35l56->playing) cs35l56_hda_pause(cs35l56); @@ -1026,7 +991,6 @@ int cs35l56_hda_common_probe(struct cs35l56_hda *cs35l56, int hid, int id) dev_set_drvdata(cs35l56->base.dev, cs35l56); INIT_WORK(&cs35l56->dsp_work, cs35l56_hda_dsp_work); - INIT_WORK(&cs35l56->control_work, cs35l56_hda_create_dsp_controls_work); ret = cs35l56_hda_read_acpi(cs35l56, hid, id); if (ret) diff --git a/sound/pci/hda/cs35l56_hda.h b/sound/pci/hda/cs35l56_hda.h index c40d159507c26d51568aabdd75b6c37facceebbe..38d94fb213a50a999136707a91893798b76357b8 100644 --- a/sound/pci/hda/cs35l56_hda.h +++ b/sound/pci/hda/cs35l56_hda.h @@ -23,7 +23,6 @@ struct cs35l56_hda { struct cs35l56_base base; struct hda_codec *codec; struct work_struct dsp_work; - struct work_struct control_work; int index; const char *system_name; diff --git a/sound/pci/hda/hda_controller.h b/sound/pci/hda/hda_controller.h index c2d0109866e62e5a93f915ad202ce71c0f84d6b9..68c883f202ca5bac763b0b07a81084067df4ec60 100644 --- a/sound/pci/hda/hda_controller.h +++ b/sound/pci/hda/hda_controller.h @@ -28,7 +28,7 @@ #else #define AZX_DCAPS_I915_COMPONENT 0 /* NOP */ #endif -/* 14 unused */ +#define AZX_DCAPS_AMD_ALLOC_FIX (1 << 14) /* AMD allocation workaround */ #define AZX_DCAPS_CTX_WORKAROUND (1 << 15) /* X-Fi workaround */ #define AZX_DCAPS_POSFIX_LPIB (1 << 16) /* Use LPIB as default */ #define AZX_DCAPS_AMD_WORKAROUND (1 << 17) /* AMD-specific workaround */ diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c index f64d9dc197a31af31a2cc80a5148830654fd9fb9..9cff87dfbecbb1fffa35a3d8819ac82c51571bb5 100644 --- a/sound/pci/hda/hda_generic.c +++ b/sound/pci/hda/hda_generic.c @@ -4955,6 +4955,69 @@ void snd_hda_gen_stream_pm(struct hda_codec *codec, hda_nid_t nid, bool on) } EXPORT_SYMBOL_GPL(snd_hda_gen_stream_pm); +/* forcibly mute the speaker output without caching; return true if updated */ +static bool force_mute_output_path(struct hda_codec *codec, hda_nid_t nid) +{ + if (!nid) + return false; + if (!nid_has_mute(codec, nid, HDA_OUTPUT)) + return false; /* no mute, skip */ + if (snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) & + snd_hda_codec_amp_read(codec, nid, 1, HDA_OUTPUT, 0) & + HDA_AMP_MUTE) + return false; /* both channels already muted, skip */ + + /* direct amp update without caching */ + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE, + AC_AMP_SET_OUTPUT | AC_AMP_SET_LEFT | + AC_AMP_SET_RIGHT | HDA_AMP_MUTE); + return true; +} + +/** + * snd_hda_gen_shutup_speakers - Forcibly mute the speaker outputs + * @codec: the HDA codec + * + * Forcibly mute the speaker outputs, to be called at suspend or shutdown. + * + * The mute state done by this function isn't cached, hence the original state + * will be restored at resume. + * + * Return true if the mute state has been changed. + */ +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec) +{ + struct hda_gen_spec *spec = codec->spec; + const int *paths; + const struct nid_path *path; + int i, p, num_paths; + bool updated = false; + + /* if already powered off, do nothing */ + if (!snd_hdac_is_power_on(&codec->core)) + return false; + + if (spec->autocfg.line_out_type == AUTO_PIN_SPEAKER_OUT) { + paths = spec->out_paths; + num_paths = spec->autocfg.line_outs; + } else { + paths = spec->speaker_paths; + num_paths = spec->autocfg.speaker_outs; + } + + for (i = 0; i < num_paths; i++) { + path = snd_hda_get_path_from_idx(codec, paths[i]); + if (!path) + continue; + for (p = 0; p < path->depth; p++) + if (force_mute_output_path(codec, path->path[p])) + updated = true; + } + + return updated; +} +EXPORT_SYMBOL_GPL(snd_hda_gen_shutup_speakers); + /** * snd_hda_gen_parse_auto_config - Parse the given BIOS configuration and * set up the hda_gen_spec diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h index 8f5ecf740c4917051ee9c2f0a5d0666109f70dc7..08544601b4ce29865035734fa59901159dfbeff1 100644 --- a/sound/pci/hda/hda_generic.h +++ b/sound/pci/hda/hda_generic.h @@ -353,5 +353,6 @@ int snd_hda_gen_add_mute_led_cdev(struct hda_codec *codec, int snd_hda_gen_add_micmute_led_cdev(struct hda_codec *codec, int (*callback)(struct led_classdev *, enum led_brightness)); +bool snd_hda_gen_shutup_speakers(struct hda_codec *codec); #endif /* __SOUND_HDA_GENERIC_H */ diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b33602e64d174c8af1bf196ee5b0ce90668f9681..97d33a48ff17c80628cef26f1e690fdcf57dd0d8 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -40,6 +40,7 @@ #ifdef CONFIG_X86 /* for snoop control */ +#include #include #include #endif @@ -306,7 +307,7 @@ enum { /* quirks for ATI HDMI with snoop off */ #define AZX_DCAPS_PRESET_ATI_HDMI_NS \ - (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_SNOOP_OFF) + (AZX_DCAPS_PRESET_ATI_HDMI | AZX_DCAPS_AMD_ALLOC_FIX) /* quirks for AMD SB */ #define AZX_DCAPS_PRESET_AMD_SB \ @@ -1702,6 +1703,13 @@ static void azx_check_snoop_available(struct azx *chip) if (chip->driver_caps & AZX_DCAPS_SNOOP_OFF) snoop = false; +#ifdef CONFIG_X86 + /* check the presence of DMA ops (i.e. IOMMU), disable snoop conditionally */ + if ((chip->driver_caps & AZX_DCAPS_AMD_ALLOC_FIX) && + !get_dma_ops(chip->card->dev)) + snoop = false; +#endif + chip->snoop = snoop; if (!snoop) { dev_info(chip->card->dev, "Force to non-snoop mode\n"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 17389a3801bd12976f3552e3e89cbbf4cd18ebed..f030669243f9a501e3a65a2d991f770fa1b8f4d4 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -21,12 +21,6 @@ #include "hda_jack.h" #include "hda_generic.h" -enum { - CX_HEADSET_NOPRESENT = 0, - CX_HEADSET_PARTPRESENT, - CX_HEADSET_ALLPRESENT, -}; - struct conexant_spec { struct hda_gen_spec gen; @@ -48,7 +42,6 @@ struct conexant_spec { unsigned int gpio_led; unsigned int gpio_mute_led_mask; unsigned int gpio_mic_led_mask; - unsigned int headset_present_flag; bool is_cx8070_sn6140; }; @@ -212,6 +205,8 @@ static void cx_auto_shutdown(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; + snd_hda_gen_shutup_speakers(codec); + /* Turn the problematic codec into D3 to avoid spurious noises from the internal speaker during (and after) reboot */ cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, false); @@ -250,48 +245,19 @@ static void cx_process_headset_plugin(struct hda_codec *codec) } } -static void cx_update_headset_mic_vref(struct hda_codec *codec, unsigned int res) +static void cx_update_headset_mic_vref(struct hda_codec *codec, struct hda_jack_callback *event) { - unsigned int phone_present, mic_persent, phone_tag, mic_tag; - struct conexant_spec *spec = codec->spec; + unsigned int mic_present; /* In cx8070 and sn6140, the node 16 can only be config to headphone or disabled, * the node 19 can only be config to microphone or disabled. * Check hp&mic tag to process headset pulgin&plugout. */ - phone_tag = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - mic_tag = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_UNSOLICITED_RESPONSE, 0x0); - if ((phone_tag & (res >> AC_UNSOL_RES_TAG_SHIFT)) || - (mic_tag & (res >> AC_UNSOL_RES_TAG_SHIFT))) { - phone_present = snd_hda_codec_read(codec, 0x16, 0, AC_VERB_GET_PIN_SENSE, 0x0); - if (!(phone_present & AC_PINSENSE_PRESENCE)) {/* headphone plugout */ - spec->headset_present_flag = CX_HEADSET_NOPRESENT; - snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); - return; - } - if (spec->headset_present_flag == CX_HEADSET_NOPRESENT) { - spec->headset_present_flag = CX_HEADSET_PARTPRESENT; - } else if (spec->headset_present_flag == CX_HEADSET_PARTPRESENT) { - mic_persent = snd_hda_codec_read(codec, 0x19, 0, - AC_VERB_GET_PIN_SENSE, 0x0); - /* headset is present */ - if ((phone_present & AC_PINSENSE_PRESENCE) && - (mic_persent & AC_PINSENSE_PRESENCE)) { - cx_process_headset_plugin(codec); - spec->headset_present_flag = CX_HEADSET_ALLPRESENT; - } - } - } -} - -static void cx_jack_unsol_event(struct hda_codec *codec, unsigned int res) -{ - struct conexant_spec *spec = codec->spec; - - if (spec->is_cx8070_sn6140) - cx_update_headset_mic_vref(codec, res); - - snd_hda_jack_unsol_event(codec, res); + mic_present = snd_hda_codec_read(codec, 0x19, 0, AC_VERB_GET_PIN_SENSE, 0x0); + if (!(mic_present & AC_PINSENSE_PRESENCE)) /* mic plugout */ + snd_hda_codec_write(codec, 0x19, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, 0x20); + else + cx_process_headset_plugin(codec); } static int cx_auto_suspend(struct hda_codec *codec) @@ -305,7 +271,7 @@ static const struct hda_codec_ops cx_auto_patch_ops = { .build_pcms = snd_hda_gen_build_pcms, .init = cx_auto_init, .free = cx_auto_free, - .unsol_event = cx_jack_unsol_event, + .unsol_event = snd_hda_jack_unsol_event, .suspend = cx_auto_suspend, .check_power_status = snd_hda_gen_check_power_status, }; @@ -1163,7 +1129,7 @@ static int patch_conexant_auto(struct hda_codec *codec) case 0x14f11f86: case 0x14f11f87: spec->is_cx8070_sn6140 = true; - spec->headset_present_flag = CX_HEADSET_NOPRESENT; + snd_hda_jack_detect_enable_callback(codec, 0x19, cx_update_headset_mic_vref); break; } diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c index 707d203ba6527654e3a510112a3658a084aa7c4c..78042ac2b71f217d671dff14f9af8b5f1b48f665 100644 --- a/sound/pci/hda/patch_hdmi.c +++ b/sound/pci/hda/patch_hdmi.c @@ -1989,6 +1989,8 @@ static int hdmi_add_cvt(struct hda_codec *codec, hda_nid_t cvt_nid) } static const struct snd_pci_quirk force_connect_list[] = { + SND_PCI_QUIRK(0x103c, 0x83e2, "HP EliteDesk 800 G4", 1), + SND_PCI_QUIRK(0x103c, 0x83ef, "HP MP9 G4 Retail System AMS", 1), SND_PCI_QUIRK(0x103c, 0x870f, "HP", 1), SND_PCI_QUIRK(0x103c, 0x871a, "HP", 1), SND_PCI_QUIRK(0x103c, 0x8711, "HP", 1), diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index ba0ce8750ca44223dc314982f4c8b9eb6f16bdef..d022a25635f9bf7cf751fb6f967005df98f08c47 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -11,15 +11,18 @@ */ #include +#include #include #include #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -583,7 +586,6 @@ static void alc_shutup_pins(struct hda_codec *codec) switch (codec->core.vendor_id) { case 0x10ec0236: case 0x10ec0256: - case 0x10ec0257: case 0x19e58326: case 0x10ec0283: case 0x10ec0285: @@ -6856,6 +6858,86 @@ static void comp_generic_fixup(struct hda_codec *cdc, int action, const char *bu } } +static void cs35lxx_autodet_fixup(struct hda_codec *cdc, + const struct hda_fixup *fix, + int action) +{ + struct device *dev = hda_codec_dev(cdc); + struct acpi_device *adev; + struct fwnode_handle *fwnode __free(fwnode_handle) = NULL; + const char *bus = NULL; + static const struct { + const char *hid; + const char *name; + } acpi_ids[] = {{ "CSC3554", "cs35l54-hda" }, + { "CSC3556", "cs35l56-hda" }, + { "CSC3557", "cs35l57-hda" }}; + char *match; + int i, count = 0, count_devindex = 0; + + switch (action) { + case HDA_FIXUP_ACT_PRE_PROBE: + for (i = 0; i < ARRAY_SIZE(acpi_ids); ++i) { + adev = acpi_dev_get_first_match_dev(acpi_ids[i].hid, NULL, -1); + if (adev) + break; + } + if (!adev) { + dev_err(dev, "Failed to find ACPI entry for a Cirrus Amp\n"); + return; + } + + count = i2c_acpi_client_count(adev); + if (count > 0) { + bus = "i2c"; + } else { + count = acpi_spi_count_resources(adev); + if (count > 0) + bus = "spi"; + } + + fwnode = fwnode_handle_get(acpi_fwnode_handle(adev)); + acpi_dev_put(adev); + + if (!bus) { + dev_err(dev, "Did not find any buses for %s\n", acpi_ids[i].hid); + return; + } + + if (!fwnode) { + dev_err(dev, "Could not get fwnode for %s\n", acpi_ids[i].hid); + return; + } + + /* + * When available the cirrus,dev-index property is an accurate + * count of the amps in a system and is used in preference to + * the count of bus devices that can contain additional address + * alias entries. + */ + count_devindex = fwnode_property_count_u32(fwnode, "cirrus,dev-index"); + if (count_devindex > 0) + count = count_devindex; + + match = devm_kasprintf(dev, GFP_KERNEL, "-%%s:00-%s.%%d", acpi_ids[i].name); + if (!match) + return; + dev_info(dev, "Found %d %s on %s (%s)\n", count, acpi_ids[i].hid, bus, match); + comp_generic_fixup(cdc, action, bus, acpi_ids[i].hid, match, count); + + break; + case HDA_FIXUP_ACT_FREE: + /* + * Pass the action on to comp_generic_fixup() so that + * hda_component_manager functions can be called in just once + * place. In this context the bus, hid, match_str or count + * values do not need to be calculated. + */ + comp_generic_fixup(cdc, action, NULL, NULL, NULL, 0); + break; + } +} + static void cs35l41_fixup_i2c_two(struct hda_codec *cdc, const struct hda_fixup *fix, int action) { comp_generic_fixup(cdc, action, "i2c", "CSC3551", "-%s:00-cs35l41-hda.%d", 2); @@ -7528,6 +7610,7 @@ enum { ALC256_FIXUP_CHROME_BOOK, ALC287_FIXUP_LENOVO_14ARP8_LEGION_IAH7, ALC287_FIXUP_LENOVO_SSID_17AA3820, + ALCXXX_FIXUP_CS35LXX, }; /* A special fixup for Lenovo C940 and Yoga Duet 7; @@ -9857,6 +9940,10 @@ static const struct hda_fixup alc269_fixups[] = { .type = HDA_FIXUP_FUNC, .v.func = alc287_fixup_lenovo_ssid_17aa3820, }, + [ALCXXX_FIXUP_CS35LXX] = { + .type = HDA_FIXUP_FUNC, + .v.func = cs35lxx_autodet_fixup, + }, }; static const struct snd_pci_quirk alc269_fixup_tbl[] = { @@ -9872,6 +9959,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS), SND_PCI_QUIRK(0x1025, 0x080d, "Acer Aspire V5-122P", ALC269_FIXUP_ASPIRE_HEADSET_MIC), SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF), + SND_PCI_QUIRK(0x1025, 0x100c, "Acer Aspire E5-574G", ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST), SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK), SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE), SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC), @@ -10270,6 +10358,17 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x103c, 0x8cdf, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8ce0, "HP SnowWhite", ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED), SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED), + SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d08, "HP EliteBook 1045 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d85, "HP EliteBook 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d86, "HP Elite x360 1040 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8c, "HP EliteBook 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8d, "HP Elite x360 830 13 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8e, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d8f, "HP EliteBook 840 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d90, "HP EliteBook 860 16 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALCXXX_FIXUP_CS35LXX), + SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALCXXX_FIXUP_CS35LXX), SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC), SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300), SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST), @@ -10677,6 +10776,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = { SND_PCI_QUIRK(0x8086, 0x3038, "Intel NUC 13", ALC295_FIXUP_CHROME_BOOK), SND_PCI_QUIRK(0xf111, 0x0001, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), SND_PCI_QUIRK(0xf111, 0x0006, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), + SND_PCI_QUIRK(0xf111, 0x0009, "Framework Laptop", ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE), #if 0 /* Below is a quirk table taken from the old code. diff --git a/sound/pci/hda/tas2781_hda_i2c.c b/sound/pci/hda/tas2781_hda_i2c.c index 49bd7097d8928aa11ffa53ee22748bd1a304d893..89d8235537cd3b01f96a210be2fcc84b07e891ae 100644 --- a/sound/pci/hda/tas2781_hda_i2c.c +++ b/sound/pci/hda/tas2781_hda_i2c.c @@ -2,10 +2,12 @@ // // TAS2781 HDA I2C driver // -// Copyright 2023 Texas Instruments, Inc. +// Copyright 2023 - 2024 Texas Instruments, Inc. // // Author: Shenghao Ding +// Current maintainer: Baojun Xu +#include #include #include #include @@ -519,20 +521,22 @@ static void tas2781_apply_calib(struct tasdevice_priv *tas_priv) static const unsigned char rgno_array[CALIB_MAX] = { 0x74, 0x0c, 0x14, 0x70, 0x7c, }; - unsigned char *data; + int offset = 0; int i, j, rc; + __be32 data; for (i = 0; i < tas_priv->ndev; i++) { - data = tas_priv->cali_data.data + - i * TASDEVICE_SPEAKER_CALIBRATION_SIZE; for (j = 0; j < CALIB_MAX; j++) { + data = cpu_to_be32( + *(uint32_t *)&tas_priv->cali_data.data[offset]); rc = tasdevice_dev_bulk_write(tas_priv, i, TASDEVICE_REG(0, page_array[j], rgno_array[j]), - &(data[4 * j]), 4); + (unsigned char *)&data, 4); if (rc < 0) dev_err(tas_priv->dev, "chn %d calib %d bulk_wr err = %d\n", i, j, rc); + offset += 4; } } } diff --git a/sound/soc/amd/yc/acp6x-mach.c b/sound/soc/amd/yc/acp6x-mach.c index 1769e07e83dcd01df1331fc29f9d176b03a5d4ed..0523c16305db1296b94a066bbea0cf9180e11a05 100644 --- a/sound/soc/amd/yc/acp6x-mach.c +++ b/sound/soc/amd/yc/acp6x-mach.c @@ -220,6 +220,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "21J6"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "LENOVO"), + DMI_MATCH(DMI_PRODUCT_NAME, "21M3"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -409,6 +416,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A43"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8A44"), + } + }, { .driver_data = &acp6x_card, .matches = { @@ -423,6 +437,13 @@ static const struct dmi_system_id yc_acp_quirk_table[] = { DMI_MATCH(DMI_BOARD_NAME, "8A3E"), } }, + { + .driver_data = &acp6x_card, + .matches = { + DMI_MATCH(DMI_BOARD_VENDOR, "HP"), + DMI_MATCH(DMI_BOARD_NAME, "8B27"), + } + }, { .driver_data = &acp6x_card, .matches = { diff --git a/sound/soc/codecs/cs-amp-lib.c b/sound/soc/codecs/cs-amp-lib.c index 287ac01a387357beffe2a8f76ab5f3ec15dc7b3d..605964af8afad4ac2763306094c2002c2f720b45 100644 --- a/sound/soc/codecs/cs-amp-lib.c +++ b/sound/soc/codecs/cs-amp-lib.c @@ -108,7 +108,7 @@ static efi_status_t cs_amp_get_efi_variable(efi_char16_t *name, KUNIT_STATIC_STUB_REDIRECT(cs_amp_get_efi_variable, name, guid, size, buf); - if (IS_ENABLED(CONFIG_EFI)) + if (efi_rt_services_supported(EFI_RT_SUPPORTED_GET_VARIABLE)) return efi.get_variable(name, guid, &attr, size, buf); return EFI_NOT_FOUND; diff --git a/sound/soc/codecs/cs35l45.c b/sound/soc/codecs/cs35l45.c index 2392c6effed857c32326ee032f776dcc577ed0fb..1e9d73bee3b4e2042e2a98a5d06d96c517699e52 100644 --- a/sound/soc/codecs/cs35l45.c +++ b/sound/soc/codecs/cs35l45.c @@ -176,17 +176,10 @@ static int cs35l45_activate_ctl(struct snd_soc_component *component, struct snd_kcontrol *kcontrol; struct snd_kcontrol_volatile *vd; unsigned int index_offset; - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - if (component->name_prefix) - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s %s", - component->name_prefix, ctl_name); - else - snprintf(name, SNDRV_CTL_ELEM_ID_NAME_MAXLEN, "%s", ctl_name); - - kcontrol = snd_soc_card_get_kcontrol_locked(component->card, name); + kcontrol = snd_soc_component_get_kcontrol_locked(component, ctl_name); if (!kcontrol) { - dev_err(component->dev, "Can't find kcontrol %s\n", name); + dev_err(component->dev, "Can't find kcontrol %s\n", ctl_name); return -EINVAL; } diff --git a/sound/soc/codecs/cs35l56-sdw.c b/sound/soc/codecs/cs35l56-sdw.c index fc03bb7ecae136b99fccc2efb332f05c4c1124fd..7c9a17fe2195ccc6011f4bc5ee09f594860b456e 100644 --- a/sound/soc/codecs/cs35l56-sdw.c +++ b/sound/soc/codecs/cs35l56-sdw.c @@ -23,6 +23,79 @@ /* Register addresses are offset when sent over SoundWire */ #define CS35L56_SDW_ADDR_OFFSET 0x8000 +/* Cirrus bus bridge registers */ +#define CS35L56_SDW_MEM_ACCESS_STATUS 0xd0 +#define CS35L56_SDW_MEM_READ_DATA 0xd8 + +#define CS35L56_SDW_LAST_LATE BIT(3) +#define CS35L56_SDW_CMD_IN_PROGRESS BIT(2) +#define CS35L56_SDW_RDATA_RDY BIT(0) + +#define CS35L56_LATE_READ_POLL_US 10 +#define CS35L56_LATE_READ_TIMEOUT_US 1000 + +static int cs35l56_sdw_poll_mem_status(struct sdw_slave *peripheral, + unsigned int mask, + unsigned int match) +{ + int ret, val; + + ret = read_poll_timeout(sdw_read_no_pm, val, + (val < 0) || ((val & mask) == match), + CS35L56_LATE_READ_POLL_US, CS35L56_LATE_READ_TIMEOUT_US, + false, peripheral, CS35L56_SDW_MEM_ACCESS_STATUS); + if (ret < 0) + return ret; + + if (val < 0) + return val; + + return 0; +} + +static int cs35l56_sdw_slow_read(struct sdw_slave *peripheral, unsigned int reg, + u8 *buf, size_t val_size) +{ + int ret, i; + + reg += CS35L56_SDW_ADDR_OFFSET; + + for (i = 0; i < val_size; i += sizeof(u32)) { + /* Poll for bus bridge idle */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_CMD_IN_PROGRESS, + 0); + if (ret < 0) { + dev_err(&peripheral->dev, "!CMD_IN_PROGRESS fail: %d\n", ret); + return ret; + } + + /* Reading LSByte triggers read of register to holding buffer */ + sdw_read_no_pm(peripheral, reg + i); + + /* Wait for data available */ + ret = cs35l56_sdw_poll_mem_status(peripheral, + CS35L56_SDW_RDATA_RDY, + CS35L56_SDW_RDATA_RDY); + if (ret < 0) { + dev_err(&peripheral->dev, "RDATA_RDY fail: %d\n", ret); + return ret; + } + + /* Read data from buffer */ + ret = sdw_nread_no_pm(peripheral, CS35L56_SDW_MEM_READ_DATA, + sizeof(u32), &buf[i]); + if (ret) { + dev_err(&peripheral->dev, "Late read @%#x failed: %d\n", reg + i, ret); + return ret; + } + + swab32s((u32 *)&buf[i]); + } + + return 0; +} + static int cs35l56_sdw_read_one(struct sdw_slave *peripheral, unsigned int reg, void *buf) { int ret; @@ -48,6 +121,10 @@ static int cs35l56_sdw_read(void *context, const void *reg_buf, int ret; reg = le32_to_cpu(*(const __le32 *)reg_buf); + + if (cs35l56_is_otp_register(reg)) + return cs35l56_sdw_slow_read(peripheral, reg, buf8, val_size); + reg += CS35L56_SDW_ADDR_OFFSET; if (val_size == 4) diff --git a/sound/soc/codecs/cs35l56-shared.c b/sound/soc/codecs/cs35l56-shared.c index e7e8d617da94e4931062f00db7bd386f14694ddf..bd74fef33d498341f8ed7604887927e6bb04e131 100644 --- a/sound/soc/codecs/cs35l56-shared.c +++ b/sound/soc/codecs/cs35l56-shared.c @@ -36,6 +36,7 @@ static const struct reg_sequence cs35l56_patch[] = { { CS35L56_SWIRE_DP3_CH2_INPUT, 0x00000019 }, { CS35L56_SWIRE_DP3_CH3_INPUT, 0x00000029 }, { CS35L56_SWIRE_DP3_CH4_INPUT, 0x00000028 }, + { CS35L56_IRQ1_MASK_18, 0x1f7df0ff }, /* These are not reset by a soft-reset, so patch to defaults. */ { CS35L56_MAIN_RENDER_USER_MUTE, 0x00000000 }, diff --git a/sound/soc/codecs/cs35l56.c b/sound/soc/codecs/cs35l56.c index 84c34f5b1a516d092e1ded727d3eb77c9f7f5b1e..757ade6373ede393496e9cbc0ee76b2c3d9d3b1f 100644 --- a/sound/soc/codecs/cs35l56.c +++ b/sound/soc/codecs/cs35l56.c @@ -1095,6 +1095,11 @@ int cs35l56_system_resume(struct device *dev) } EXPORT_SYMBOL_GPL(cs35l56_system_resume); +static int cs35l56_control_add_nop(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl) +{ + return 0; +} + static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) { struct wm_adsp *dsp; @@ -1117,6 +1122,12 @@ static int cs35l56_dsp_init(struct cs35l56_private *cs35l56) dsp->fw = 12; dsp->wmfw_optional = true; + /* + * None of the firmware controls need to be exported so add a no-op + * callback that suppresses creating an ALSA control. + */ + dsp->control_add = &cs35l56_control_add_nop; + dev_dbg(cs35l56->base.dev, "DSP system name: '%s'\n", dsp->system_name); ret = wm_halo_init(dsp); diff --git a/sound/soc/codecs/cs42l42.c b/sound/soc/codecs/cs42l42.c index 60d366e53526ff20a36eaea6322a8d8baf194446..6400ac875e6f6ca6ab5f1ac3cb2acc53f99b85fd 100644 --- a/sound/soc/codecs/cs42l42.c +++ b/sound/soc/codecs/cs42l42.c @@ -11,7 +11,6 @@ #include #include -#include #include #include #include diff --git a/sound/soc/codecs/cs42l43.c b/sound/soc/codecs/cs42l43.c index 92674314227c4623af36a6b1642e83c8a33572d6..5183b4586424326e3ee900ba875aa3c6515d4f85 100644 --- a/sound/soc/codecs/cs42l43.c +++ b/sound/soc/codecs/cs42l43.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -252,24 +253,20 @@ CS42L43_IRQ_COMPLETE(load_detect) static irqreturn_t cs42l43_mic_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - static const char * const controls[] = { - "Decimator 1 Switch", - "Decimator 2 Switch", - "Decimator 3 Switch", - "Decimator 4 Switch", - }; - int i, ret; + struct snd_soc_component *component = priv->component; + int i; dev_dbg(priv->dev, "Microphone shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - for (i = 0; i < ARRAY_SIZE(controls); i++) { - ret = snd_soc_component_notify_control(priv->component, - controls[i]); - if (ret) + for (i = 1; i < ARRAY_SIZE(priv->kctl); i++) { + if (!priv->kctl[i]) return IRQ_NONE; + + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[i]->id); } return IRQ_HANDLED; @@ -278,18 +275,19 @@ static irqreturn_t cs42l43_mic_shutter(int irq, void *data) static irqreturn_t cs42l43_spk_shutter(int irq, void *data) { struct cs42l43_codec *priv = data; - int ret; + struct snd_soc_component *component = priv->component; dev_dbg(priv->dev, "Speaker shutter changed\n"); - if (!priv->component) + if (!component) return IRQ_NONE; - ret = snd_soc_component_notify_control(priv->component, - "Speaker Digital Switch"); - if (ret) + if (!priv->kctl[0]) return IRQ_NONE; + snd_ctl_notify(component->card->snd_card, + SNDRV_CTL_EVENT_MASK_VALUE, &priv->kctl[0]->id); + return IRQ_HANDLED; } @@ -590,7 +588,46 @@ static int cs42l43_asp_set_tdm_slot(struct snd_soc_dai *dai, unsigned int tx_mas return 0; } +static int cs42l43_dai_probe(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + static const char * const controls[] = { + "Speaker Digital Switch", + "Decimator 1 Switch", + "Decimator 2 Switch", + "Decimator 3 Switch", + "Decimator 4 Switch", + }; + int i; + + static_assert(ARRAY_SIZE(controls) == ARRAY_SIZE(priv->kctl)); + + for (i = 0; i < ARRAY_SIZE(controls); i++) { + if (priv->kctl[i]) + continue; + + priv->kctl[i] = snd_soc_component_get_kcontrol(component, controls[i]); + } + + return 0; +} + +static int cs42l43_dai_remove(struct snd_soc_dai *dai) +{ + struct snd_soc_component *component = dai->component; + struct cs42l43_codec *priv = snd_soc_component_get_drvdata(component); + int i; + + for (i = 0; i < ARRAY_SIZE(priv->kctl); i++) + priv->kctl[i] = NULL; + + return 0; +} + static const struct snd_soc_dai_ops cs42l43_asp_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .hw_params = cs42l43_asp_hw_params, .set_fmt = cs42l43_asp_set_fmt, @@ -608,9 +645,11 @@ static int cs42l43_sdw_hw_params(struct snd_pcm_substream *substream, return ret; return cs42l43_set_sample_rate(substream, params, dai); -}; +} static const struct snd_soc_dai_ops cs42l43_sdw_ops = { + .probe = cs42l43_dai_probe, + .remove = cs42l43_dai_remove, .startup = cs42l43_startup, .set_stream = cs42l43_sdw_set_stream, .hw_params = cs42l43_sdw_hw_params, diff --git a/sound/soc/codecs/cs42l43.h b/sound/soc/codecs/cs42l43.h index 9924c13e1eb539a24efdcb2d5ed27c3f11cc9ed2..9c144e129535fa7f4b6ca3427df4418623f09481 100644 --- a/sound/soc/codecs/cs42l43.h +++ b/sound/soc/codecs/cs42l43.h @@ -100,6 +100,8 @@ struct cs42l43_codec { struct delayed_work hp_ilimit_clear_work; bool hp_ilimited; int hp_ilimit_count; + + struct snd_kcontrol *kctl[5]; }; #if IS_REACHABLE(CONFIG_SND_SOC_CS42L43_SDW) diff --git a/sound/soc/codecs/cs530x.c b/sound/soc/codecs/cs530x.c index 25a86a32e936679dcfd13d9428e47618ac60706f..da52afe56c3c625aed26212dac6094e099a3d8e1 100644 --- a/sound/soc/codecs/cs530x.c +++ b/sound/soc/codecs/cs530x.c @@ -129,16 +129,16 @@ volsw_err: static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -1270, 50, 0); -static const char * const cs530x_in_hpf_text[] = { +static const char * const cs530x_in_filter_text[] = { "Min Phase Slow Roll-off", "Min Phase Fast Roll-off", "Linear Phase Slow Roll-off", "Linear Phase Fast Roll-off", }; -static SOC_ENUM_SINGLE_DECL(cs530x_in_hpf_enum, CS530X_IN_FILTER, +static SOC_ENUM_SINGLE_DECL(cs530x_in_filter_enum, CS530X_IN_FILTER, CS530X_IN_FILTER_SHIFT, - cs530x_in_hpf_text); + cs530x_in_filter_text); static const char * const cs530x_in_4ch_sum_text[] = { "None", @@ -189,7 +189,7 @@ SOC_SINGLE_EXT_TLV("IN1 Volume", CS530X_IN_VOL_CTRL1_0, 0, 255, 1, SOC_SINGLE_EXT_TLV("IN2 Volume", CS530X_IN_VOL_CTRL1_1, 0, 255, 1, snd_soc_get_volsw, cs530x_put_volsw_vu, in_vol_tlv), -SOC_ENUM("IN HPF Select", cs530x_in_hpf_enum), +SOC_ENUM("IN DEC Filter Select", cs530x_in_filter_enum), SOC_ENUM("Input Ramp Up", cs530x_ramp_inc_enum), SOC_ENUM("Input Ramp Down", cs530x_ramp_dec_enum), diff --git a/sound/soc/codecs/es8326.c b/sound/soc/codecs/es8326.c index b246694ebb4faa4d7aab7de3c9145eeed9e124b9..be3c79232a3180583ef64843860ad844bd2cc2cd 100644 --- a/sound/soc/codecs/es8326.c +++ b/sound/soc/codecs/es8326.c @@ -843,6 +843,8 @@ static void es8326_jack_detect_handler(struct work_struct *work) es8326_disable_micbias(es8326->component); if (es8326->jack->status & SND_JACK_HEADPHONE) { dev_dbg(comp->dev, "Report hp remove event\n"); + snd_soc_jack_report(es8326->jack, 0, + SND_JACK_BTN_0 | SND_JACK_BTN_1 | SND_JACK_BTN_2); snd_soc_jack_report(es8326->jack, 0, SND_JACK_HEADSET); /* mute adc when mic path switch */ regmap_write(es8326->regmap, ES8326_ADC1_SRC, 0x44); diff --git a/sound/soc/codecs/lpass-va-macro.c b/sound/soc/codecs/lpass-va-macro.c index b852cc7ffad9d9417897acbf79fa64e6a9b58dbe..a62ccd09bacd72d24012a6f88f0aafcbcb4b77e0 100644 --- a/sound/soc/codecs/lpass-va-macro.c +++ b/sound/soc/codecs/lpass-va-macro.c @@ -1472,6 +1472,8 @@ static void va_macro_set_lpass_codec_version(struct va_macro *va) if ((core_id_0 == 0x01) && (core_id_1 == 0x0F)) version = LPASS_CODEC_VERSION_2_0; + if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && core_id_2 == 0x01) + version = LPASS_CODEC_VERSION_2_0; if ((core_id_0 == 0x02) && (core_id_1 == 0x0E)) version = LPASS_CODEC_VERSION_2_1; if ((core_id_0 == 0x02) && (core_id_1 == 0x0F) && (core_id_2 == 0x50 || core_id_2 == 0x51)) diff --git a/sound/soc/codecs/nau8822.c b/sound/soc/codecs/nau8822.c index e1cbaf8a944d8cf94992fbe249743a9c2d3d6259..fd4a96a12060e11f55dad2eb737dceb870dfa2ef 100644 --- a/sound/soc/codecs/nau8822.c +++ b/sound/soc/codecs/nau8822.c @@ -736,7 +736,7 @@ static int nau8822_set_pll(struct snd_soc_dai *dai, int pll_id, int source, return ret; } - dev_info(component->dev, + dev_dbg(component->dev, "pll_int=%x pll_frac=%x mclk_scaler=%x pre_factor=%x\n", pll_param->pll_int, pll_param->pll_frac, pll_param->mclk_scaler, pll_param->pre_factor); diff --git a/sound/soc/codecs/wcd937x-sdw.c b/sound/soc/codecs/wcd937x-sdw.c index 3abc8041406adc24544bb33e23b56162c0c36deb..0c33f7f3dc25c6088ba8922a8e48769955f53400 100644 --- a/sound/soc/codecs/wcd937x-sdw.c +++ b/sound/soc/codecs/wcd937x-sdw.c @@ -1049,7 +1049,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD937X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; @@ -1062,7 +1062,7 @@ static int wcd9370_probe(struct sdw_slave *pdev, /* Start in cache-only until device is enumerated */ regcache_cache_only(wcd->regmap, true); } else { - pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD937X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd937x_dpn_prop; wcd->ch_info = &wcd937x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd938x-sdw.c b/sound/soc/codecs/wcd938x-sdw.c index c995bcc59eadb4f2c726267b48044bb3dc6e8c93..7da8a10bd0a93281822a6d315ddff06cc9c7894c 100644 --- a/sound/soc/codecs/wcd938x-sdw.c +++ b/sound/soc/codecs/wcd938x-sdw.c @@ -1252,12 +1252,12 @@ static int wcd9380_probe(struct sdw_slave *pdev, pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD938X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd938x_dpn_prop; wcd->ch_info = &wcd938x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wcd939x-sdw.c b/sound/soc/codecs/wcd939x-sdw.c index 94b1e99a3ca0ecdb16a8dd73b2b07ee54564dd50..fca95777a75af930828e8b31575d6198c47d4639 100644 --- a/sound/soc/codecs/wcd939x-sdw.c +++ b/sound/soc/codecs/wcd939x-sdw.c @@ -1453,12 +1453,12 @@ static int wcd9390_probe(struct sdw_slave *pdev, const struct sdw_device_id *id) pdev->prop.lane_control_support = true; pdev->prop.simple_clk_stop_capable = true; if (wcd->is_tx) { - pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS, 0); + pdev->prop.source_ports = GENMASK(WCD939X_MAX_TX_SWR_PORTS - 1, 0); pdev->prop.src_dpn_prop = wcd939x_tx_dpn_prop; wcd->ch_info = &wcd939x_sdw_tx_ch_info[0]; pdev->prop.wake_capable = true; } else { - pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WCD939X_MAX_RX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wcd939x_rx_dpn_prop; wcd->ch_info = &wcd939x_sdw_rx_ch_info[0]; } diff --git a/sound/soc/codecs/wm_adsp.c b/sound/soc/codecs/wm_adsp.c index 9f8549b34e30254dc9831300b1495e58f2158d5c..e69283195f362ffd928eb006a987ad57dae0b49a 100644 --- a/sound/soc/codecs/wm_adsp.c +++ b/sound/soc/codecs/wm_adsp.c @@ -583,7 +583,7 @@ static void wm_adsp_ctl_work(struct work_struct *work) kfree(kcontrol); } -static int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl) { struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); struct cs_dsp *cs_dsp = &dsp->cs_dsp; @@ -658,6 +658,17 @@ err_ctl: return ret; } +EXPORT_SYMBOL_GPL(wm_adsp_control_add); + +static int wm_adsp_control_add_cb(struct cs_dsp_coeff_ctl *cs_ctl) +{ + struct wm_adsp *dsp = container_of(cs_ctl->dsp, struct wm_adsp, cs_dsp); + + if (dsp->control_add) + return (dsp->control_add)(dsp, cs_ctl); + else + return wm_adsp_control_add(cs_ctl); +} static void wm_adsp_control_remove(struct cs_dsp_coeff_ctl *cs_ctl) { @@ -2072,12 +2083,12 @@ irqreturn_t wm_halo_wdt_expire(int irq, void *data) EXPORT_SYMBOL_GPL(wm_halo_wdt_expire); static const struct cs_dsp_client_ops wm_adsp1_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, }; static const struct cs_dsp_client_ops wm_adsp2_client_ops = { - .control_add = wm_adsp_control_add, + .control_add = wm_adsp_control_add_cb, .control_remove = wm_adsp_control_remove, .pre_run = wm_adsp_pre_run, .post_run = wm_adsp_event_post_run, diff --git a/sound/soc/codecs/wm_adsp.h b/sound/soc/codecs/wm_adsp.h index e53dfcf1f78ff77e5bad89e9e1aef3bb34db242d..edc5b02ae76542378a6ff6cf794209600ef92fbc 100644 --- a/sound/soc/codecs/wm_adsp.h +++ b/sound/soc/codecs/wm_adsp.h @@ -37,6 +37,7 @@ struct wm_adsp { bool wmfw_optional; struct work_struct boot_work; + int (*control_add)(struct wm_adsp *dsp, struct cs_dsp_coeff_ctl *cs_ctl); int (*pre_run)(struct wm_adsp *dsp); bool preloaded; @@ -132,6 +133,8 @@ int wm_adsp_compr_pointer(struct snd_soc_component *component, int wm_adsp_compr_copy(struct snd_soc_component *component, struct snd_compr_stream *stream, char __user *buf, size_t count); + +int wm_adsp_control_add(struct cs_dsp_coeff_ctl *cs_ctl); int wm_adsp_write_ctl(struct wm_adsp *dsp, const char *name, int type, unsigned int alg, void *buf, size_t len); int wm_adsp_read_ctl(struct wm_adsp *dsp, const char *name, int type, diff --git a/sound/soc/codecs/wsa881x.c b/sound/soc/codecs/wsa881x.c index 0478599d0f359f2fbd64bc353e7b179b90adafd2..fb9e92f08d987761181cf5d3bfdbc23534f86483 100644 --- a/sound/soc/codecs/wsa881x.c +++ b/sound/soc/codecs/wsa881x.c @@ -1152,7 +1152,7 @@ static int wsa881x_probe(struct sdw_slave *pdev, wsa881x->sconfig.frame_rate = 48000; wsa881x->sconfig.direction = SDW_DATA_DIR_RX; wsa881x->sconfig.type = SDW_STREAM_PDM; - pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA881X_MAX_SWR_PORTS - 1, 0); pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; pdev->prop.clk_stop_mode1 = true; diff --git a/sound/soc/codecs/wsa883x.c b/sound/soc/codecs/wsa883x.c index d0ab4e2290b6a9f049a95534cbe7673b9c1ae594..3e4fdaa3f44fb10a1d1200b02d54b4d3a3405913 100644 --- a/sound/soc/codecs/wsa883x.c +++ b/sound/soc/codecs/wsa883x.c @@ -1406,7 +1406,7 @@ static int wsa883x_probe(struct sdw_slave *pdev, WSA883X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA883X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/codecs/wsa884x.c b/sound/soc/codecs/wsa884x.c index d17ae17b2938b40ee4bb50bbacaad834e6ff4751..89eb5e03a61704dda9693d31d5e4c02b3eb98855 100644 --- a/sound/soc/codecs/wsa884x.c +++ b/sound/soc/codecs/wsa884x.c @@ -1895,7 +1895,7 @@ static int wsa884x_probe(struct sdw_slave *pdev, WSA884X_MAX_SWR_PORTS)) dev_dbg(dev, "Static Port mapping not specified\n"); - pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS, 0); + pdev->prop.sink_ports = GENMASK(WSA884X_MAX_SWR_PORTS - 1, 0); pdev->prop.simple_clk_stop_capable = true; pdev->prop.sink_dpn_prop = wsa884x_sink_dpn_prop; pdev->prop.scp_int1_mask = SDW_SCP_INT1_BUS_CLASH | SDW_SCP_INT1_PARITY; diff --git a/sound/soc/fsl/fsl_micfil.c b/sound/soc/fsl/fsl_micfil.c index 0d37edb70261cb64a47dc10659155b0e793d1c06..22b240a70ad48e65dedaf59522eb528d16a54ee2 100644 --- a/sound/soc/fsl/fsl_micfil.c +++ b/sound/soc/fsl/fsl_micfil.c @@ -831,7 +831,7 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { {REG_MICFIL_CTRL1, 0x00000000}, {REG_MICFIL_CTRL2, 0x00000000}, {REG_MICFIL_STAT, 0x00000000}, - {REG_MICFIL_FIFO_CTRL, 0x00000007}, + {REG_MICFIL_FIFO_CTRL, 0x0000001F}, {REG_MICFIL_FIFO_STAT, 0x00000000}, {REG_MICFIL_DATACH0, 0x00000000}, {REG_MICFIL_DATACH1, 0x00000000}, @@ -855,6 +855,8 @@ static const struct reg_default fsl_micfil_reg_defaults[] = { static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -872,9 +874,6 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: - case REG_MICFIL_FSYNC_CTRL: - case REG_MICFIL_VERID: - case REG_MICFIL_PARAM: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: @@ -883,6 +882,12 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NDATA: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + case REG_MICFIL_VERID: + case REG_MICFIL_PARAM: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } @@ -890,6 +895,8 @@ static bool fsl_micfil_readable_reg(struct device *dev, unsigned int reg) static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) { + struct fsl_micfil *micfil = dev_get_drvdata(dev); + switch (reg) { case REG_MICFIL_CTRL1: case REG_MICFIL_CTRL2: @@ -899,7 +906,6 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_DC_CTRL: case REG_MICFIL_OUT_CTRL: case REG_MICFIL_OUT_STAT: /* Write 1 to Clear */ - case REG_MICFIL_FSYNC_CTRL: case REG_MICFIL_VAD0_CTRL1: case REG_MICFIL_VAD0_CTRL2: case REG_MICFIL_VAD0_STAT: /* Write 1 to Clear */ @@ -907,6 +913,10 @@ static bool fsl_micfil_writeable_reg(struct device *dev, unsigned int reg) case REG_MICFIL_VAD0_NCONFIG: case REG_MICFIL_VAD0_ZCD: return true; + case REG_MICFIL_FSYNC_CTRL: + if (micfil->soc->use_verid) + return true; + fallthrough; default: return false; } diff --git a/sound/soc/fsl/fsl_micfil.h b/sound/soc/fsl/fsl_micfil.h index c6b902ba0a5317d4ead1ce6ec320d8901f0fb69b..b7798a7cbf2af87b14537a19a2657960e9a421dd 100644 --- a/sound/soc/fsl/fsl_micfil.h +++ b/sound/soc/fsl/fsl_micfil.h @@ -72,7 +72,7 @@ #define MICFIL_STAT_CHXF(ch) BIT(ch) /* MICFIL FIFO Control Register -- REG_MICFIL_FIFO_CTRL 0x10 */ -#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(2, 0) +#define MICFIL_FIFO_CTRL_FIFOWMK GENMASK(4, 0) /* MICFIL FIFO Status Register -- REG_MICFIL_FIFO_STAT 0x14 */ #define MICFIL_FIFO_STAT_FIFOX_OVER(ch) BIT(ch) diff --git a/sound/soc/meson/axg-fifo.c b/sound/soc/meson/axg-fifo.c index 7e6090af720b9a2b569e20f5a25e702f0338e7d8..75909196b7698ab31e92a469dab60ea1b21502e7 100644 --- a/sound/soc/meson/axg-fifo.c +++ b/sound/soc/meson/axg-fifo.c @@ -207,25 +207,18 @@ static irqreturn_t axg_fifo_pcm_irq_block(int irq, void *dev_id) status = FIELD_GET(STATUS1_INT_STS, status); axg_fifo_ack_irq(fifo, status); - /* Use the thread to call period elapsed on nonatomic links */ - if (status & FIFO_INT_COUNT_REPEAT) - return IRQ_WAKE_THREAD; + if (status & ~FIFO_INT_COUNT_REPEAT) + dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", + status); - dev_dbg(axg_fifo_dev(ss), "unexpected irq - STS 0x%02x\n", - status); + if (status & FIFO_INT_COUNT_REPEAT) { + snd_pcm_period_elapsed(ss); + return IRQ_HANDLED; + } return IRQ_NONE; } -static irqreturn_t axg_fifo_pcm_irq_block_thread(int irq, void *dev_id) -{ - struct snd_pcm_substream *ss = dev_id; - - snd_pcm_period_elapsed(ss); - - return IRQ_HANDLED; -} - int axg_fifo_pcm_open(struct snd_soc_component *component, struct snd_pcm_substream *ss) { @@ -251,8 +244,9 @@ int axg_fifo_pcm_open(struct snd_soc_component *component, if (ret) return ret; - ret = request_threaded_irq(fifo->irq, axg_fifo_pcm_irq_block, - axg_fifo_pcm_irq_block_thread, + /* Use the threaded irq handler only with non-atomic links */ + ret = request_threaded_irq(fifo->irq, NULL, + axg_fifo_pcm_irq_block, IRQF_ONESHOT, dev_name(dev), ss); if (ret) return ret; diff --git a/sound/soc/soc-component.c b/sound/soc/soc-component.c index 4d7c2e3c929a969c134aad57bd938c7df004dae1..42f48132191979e5b167ee40e489f4a52db678f6 100644 --- a/sound/soc/soc-component.c +++ b/sound/soc/soc-component.c @@ -236,19 +236,45 @@ int snd_soc_component_force_enable_pin_unlocked( } EXPORT_SYMBOL_GPL(snd_soc_component_force_enable_pin_unlocked); -int snd_soc_component_notify_control(struct snd_soc_component *component, - const char * const ctl) +static void soc_get_kcontrol_name(struct snd_soc_component *component, + char *buf, int size, const char * const ctl) { - char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; - struct snd_kcontrol *kctl; - /* When updating, change also snd_soc_dapm_widget_name_cmp() */ if (component->name_prefix) - snprintf(name, ARRAY_SIZE(name), "%s %s", component->name_prefix, ctl); + snprintf(buf, size, "%s %s", component->name_prefix, ctl); else - snprintf(name, ARRAY_SIZE(name), "%s", ctl); + snprintf(buf, size, "%s", ctl); +} + +struct snd_kcontrol *snd_soc_component_get_kcontrol(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol); + +struct snd_kcontrol * +snd_soc_component_get_kcontrol_locked(struct snd_soc_component *component, + const char * const ctl) +{ + char name[SNDRV_CTL_ELEM_ID_NAME_MAXLEN]; + + soc_get_kcontrol_name(component, name, ARRAY_SIZE(name), ctl); + + return snd_soc_card_get_kcontrol_locked(component->card, name); +} +EXPORT_SYMBOL_GPL(snd_soc_component_get_kcontrol_locked); + +int snd_soc_component_notify_control(struct snd_soc_component *component, + const char * const ctl) +{ + struct snd_kcontrol *kctl; - kctl = snd_soc_card_get_kcontrol(component->card, name); + kctl = snd_soc_component_get_kcontrol(component, ctl); if (!kctl) return soc_component_ret(component, -EINVAL); diff --git a/sound/soc/sof/mediatek/mt8195/mt8195.c b/sound/soc/sof/mediatek/mt8195/mt8195.c index 24ae1d4959be59a9d683310248f420eab5b55ef2..1c6e035fd313f7c08bb31b49bac967e04f387fe1 100644 --- a/sound/soc/sof/mediatek/mt8195/mt8195.c +++ b/sound/soc/sof/mediatek/mt8195/mt8195.c @@ -573,7 +573,7 @@ static const struct snd_sof_dsp_ops sof_mt8195_ops = { static struct snd_sof_of_mach sof_mt8195_machs[] = { { .compatible = "google,tomato", - .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682-dts.tplg" + .sof_tplg_filename = "sof-mt8195-mt6359-rt1019-rt5682.tplg" }, { .compatible = "mediatek,mt8195", .sof_tplg_filename = "sof-mt8195.tplg" diff --git a/sound/soc/sti/sti_uniperif.c b/sound/soc/sti/sti_uniperif.c index ba824f14a39cf39a9164cb0806094947a297f4dc..a7956e5a4ee5d1a2d0248ec1a90532fed022c13c 100644 --- a/sound/soc/sti/sti_uniperif.c +++ b/sound/soc/sti/sti_uniperif.c @@ -352,7 +352,7 @@ static int sti_uniperiph_resume(struct snd_soc_component *component) return ret; } -static int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai) { struct sti_uniperiph_data *priv = snd_soc_dai_get_drvdata(dai); struct sti_uniperiph_dai *dai_data = &priv->dai_data; diff --git a/sound/soc/sti/uniperif.h b/sound/soc/sti/uniperif.h index 2a5de328501c192333187365794f49b09044dd93..74e51f0ff85c841dad9583b596587329baf6365a 100644 --- a/sound/soc/sti/uniperif.h +++ b/sound/soc/sti/uniperif.h @@ -1380,6 +1380,7 @@ int uni_reader_init(struct platform_device *pdev, struct uniperif *reader); /* common */ +int sti_uniperiph_dai_probe(struct snd_soc_dai *dai); int sti_uniperiph_dai_set_fmt(struct snd_soc_dai *dai, unsigned int fmt); diff --git a/sound/soc/sti/uniperif_player.c b/sound/soc/sti/uniperif_player.c index dd9013c4766491af0980e74020c4dcfb4d4d17a2..6d1ce030963c628935b5762de6ae7d0c7da9f91c 100644 --- a/sound/soc/sti/uniperif_player.c +++ b/sound/soc/sti/uniperif_player.c @@ -1038,6 +1038,7 @@ static const struct snd_soc_dai_ops uni_player_dai_ops = { .startup = uni_player_startup, .shutdown = uni_player_shutdown, .prepare = uni_player_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_player_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/soc/sti/uniperif_reader.c b/sound/soc/sti/uniperif_reader.c index 065c5f0d1f5f00c65715ccde98360816ddbf67a9..05ea2b794eb92563d4a53ac66791ae6780796b6a 100644 --- a/sound/soc/sti/uniperif_reader.c +++ b/sound/soc/sti/uniperif_reader.c @@ -401,6 +401,7 @@ static const struct snd_soc_dai_ops uni_reader_dai_ops = { .startup = uni_reader_startup, .shutdown = uni_reader_shutdown, .prepare = uni_reader_prepare, + .probe = sti_uniperiph_dai_probe, .trigger = uni_reader_trigger, .hw_params = sti_uniperiph_dai_hw_params, .set_fmt = sti_uniperiph_dai_set_fmt, diff --git a/sound/usb/line6/driver.c b/sound/usb/line6/driver.c index f4437015d43a7500b809a303f175b211662d500f..9df49a880b750d639c5d23abef2cbf0a07e1f9c8 100644 --- a/sound/usb/line6/driver.c +++ b/sound/usb/line6/driver.c @@ -286,12 +286,14 @@ static void line6_data_received(struct urb *urb) { struct usb_line6 *line6 = (struct usb_line6 *)urb->context; struct midi_buffer *mb = &line6->line6midi->midibuf_in; + unsigned long flags; int done; if (urb->status == -ESHUTDOWN) return; if (line6->properties->capabilities & LINE6_CAP_CONTROL_MIDI) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_write(mb, urb->transfer_buffer, urb->actual_length); @@ -300,12 +302,15 @@ static void line6_data_received(struct urb *urb) dev_dbg(line6->ifcdev, "%d %d buffer overflow - message skipped\n", done, urb->actual_length); } + spin_unlock_irqrestore(&line6->line6midi->lock, flags); for (;;) { + spin_lock_irqsave(&line6->line6midi->lock, flags); done = line6_midibuf_read(mb, line6->buffer_message, LINE6_MIDI_MESSAGE_MAXLEN, LINE6_MIDIBUF_READ_RX); + spin_unlock_irqrestore(&line6->line6midi->lock, flags); if (done <= 0) break; diff --git a/sound/usb/quirks-table.h b/sound/usb/quirks-table.h index 73abc38a5400689423497d266c587371923aaa66..aaa6a515d0f8a4b22691c0862a26841811804f26 100644 --- a/sound/usb/quirks-table.h +++ b/sound/usb/quirks-table.h @@ -273,6 +273,7 @@ YAMAHA_DEVICE(0x105a, NULL), YAMAHA_DEVICE(0x105b, NULL), YAMAHA_DEVICE(0x105c, NULL), YAMAHA_DEVICE(0x105d, NULL), +YAMAHA_DEVICE(0x1718, "P-125"), { USB_DEVICE(0x0499, 0x1503), .driver_info = (unsigned long) & (const struct snd_usb_audio_quirk) { @@ -2594,6 +2595,10 @@ YAMAHA_DEVICE(0x7010, "UB99"), } }, +/* Stanton ScratchAmp */ +{ USB_DEVICE(0x103d, 0x0100) }, +{ USB_DEVICE(0x103d, 0x0101) }, + /* Novation EMS devices */ { USB_DEVICE_VENDOR_SPEC(0x1235, 0x0001), diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c index ea063a14cdd8fed24e6aa0a33287bc20e5b680b3..e7b68c67852e92ef851d7f820dfd6c522e2c0e24 100644 --- a/sound/usb/quirks.c +++ b/sound/usb/quirks.c @@ -2221,6 +2221,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = { QUIRK_FLAG_GENERIC_IMPLICIT_FB), DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */ QUIRK_FLAG_GENERIC_IMPLICIT_FB), + DEVICE_FLG(0x2d95, 0x8021, /* VIVO USB-C-XE710 HEADSET */ + QUIRK_FLAG_CTL_MSG_DELAY_1M), DEVICE_FLG(0x30be, 0x0101, /* Schiit Hel */ QUIRK_FLAG_IGNORE_CTL_ERROR), DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */ diff --git a/sound/usb/stream.c b/sound/usb/stream.c index d5409f3879455961182713d24327836219e73457..e14c725acebf2c1dbf69f56b421081a3beb1b1cb 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -244,8 +244,8 @@ static struct snd_pcm_chmap_elem *convert_chmap(int channels, unsigned int bits, SNDRV_CHMAP_FR, /* right front */ SNDRV_CHMAP_FC, /* center front */ SNDRV_CHMAP_LFE, /* LFE */ - SNDRV_CHMAP_SL, /* left surround */ - SNDRV_CHMAP_SR, /* right surround */ + SNDRV_CHMAP_RL, /* left surround */ + SNDRV_CHMAP_RR, /* right surround */ SNDRV_CHMAP_FLC, /* left of center */ SNDRV_CHMAP_FRC, /* right of center */ SNDRV_CHMAP_RC, /* surround */ diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 7b32b99023a21d3abc9f86e7b6acd7dc861793b2..5fd7caea441936246ce9ddb7b5d7bdf04b30d14b 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -86,9 +86,14 @@ #define ARM_CPU_PART_CORTEX_X2 0xD48 #define ARM_CPU_PART_NEOVERSE_N2 0xD49 #define ARM_CPU_PART_CORTEX_A78C 0xD4B +#define ARM_CPU_PART_CORTEX_X1C 0xD4C +#define ARM_CPU_PART_CORTEX_X3 0xD4E #define ARM_CPU_PART_NEOVERSE_V2 0xD4F +#define ARM_CPU_PART_CORTEX_A720 0xD81 #define ARM_CPU_PART_CORTEX_X4 0xD82 #define ARM_CPU_PART_NEOVERSE_V3 0xD84 +#define ARM_CPU_PART_CORTEX_X925 0xD85 +#define ARM_CPU_PART_CORTEX_A725 0xD87 #define APM_CPU_PART_XGENE 0x000 #define APM_CPU_VAR_POTENZA 0x00 @@ -162,9 +167,14 @@ #define MIDR_CORTEX_X2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X2) #define MIDR_NEOVERSE_N2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_N2) #define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C) +#define MIDR_CORTEX_X1C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1C) +#define MIDR_CORTEX_X3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X3) #define MIDR_NEOVERSE_V2 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V2) +#define MIDR_CORTEX_A720 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A720) #define MIDR_CORTEX_X4 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X4) #define MIDR_NEOVERSE_V3 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V3) +#define MIDR_CORTEX_X925 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X925) +#define MIDR_CORTEX_A725 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A725) #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index 1691297a766a9c1a4df9384c4ff02ecd8ce21b92..eaeda001784ebb6f21c4e64bad3b632c2b292031 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -645,6 +645,9 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) #define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) #define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) +#define KVM_REG_PPC_DEXCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc6) +#define KVM_REG_PPC_HASHKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc7) +#define KVM_REG_PPC_HASHPKEYR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc8) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 3c7434329661c66e7c34283f0a3f2c59a87f8044..dd4682857c12083c099b1aee6c47df77287bb179 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -18,170 +18,170 @@ /* * Note: If the comment begins with a quoted string, that string is used - * in /proc/cpuinfo instead of the macro name. If the string is "", - * this feature bit is not displayed in /proc/cpuinfo at all. + * in /proc/cpuinfo instead of the macro name. Otherwise, this feature + * bit is not displayed in /proc/cpuinfo at all. * * When adding new features here that depend on other features, * please update the table in kernel/cpu/cpuid-deps.c as well. */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */ -#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ -#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ -#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ -#define X86_FEATURE_PSE ( 0*32+ 3) /* Page Size Extensions */ -#define X86_FEATURE_TSC ( 0*32+ 4) /* Time Stamp Counter */ -#define X86_FEATURE_MSR ( 0*32+ 5) /* Model-Specific Registers */ -#define X86_FEATURE_PAE ( 0*32+ 6) /* Physical Address Extensions */ -#define X86_FEATURE_MCE ( 0*32+ 7) /* Machine Check Exception */ -#define X86_FEATURE_CX8 ( 0*32+ 8) /* CMPXCHG8 instruction */ -#define X86_FEATURE_APIC ( 0*32+ 9) /* Onboard APIC */ -#define X86_FEATURE_SEP ( 0*32+11) /* SYSENTER/SYSEXIT */ -#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ -#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ -#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ -#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ -#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ -#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ -#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ -#define X86_FEATURE_CLFLUSH ( 0*32+19) /* CLFLUSH instruction */ +#define X86_FEATURE_FPU ( 0*32+ 0) /* "fpu" Onboard FPU */ +#define X86_FEATURE_VME ( 0*32+ 1) /* "vme" Virtual Mode Extensions */ +#define X86_FEATURE_DE ( 0*32+ 2) /* "de" Debugging Extensions */ +#define X86_FEATURE_PSE ( 0*32+ 3) /* "pse" Page Size Extensions */ +#define X86_FEATURE_TSC ( 0*32+ 4) /* "tsc" Time Stamp Counter */ +#define X86_FEATURE_MSR ( 0*32+ 5) /* "msr" Model-Specific Registers */ +#define X86_FEATURE_PAE ( 0*32+ 6) /* "pae" Physical Address Extensions */ +#define X86_FEATURE_MCE ( 0*32+ 7) /* "mce" Machine Check Exception */ +#define X86_FEATURE_CX8 ( 0*32+ 8) /* "cx8" CMPXCHG8 instruction */ +#define X86_FEATURE_APIC ( 0*32+ 9) /* "apic" Onboard APIC */ +#define X86_FEATURE_SEP ( 0*32+11) /* "sep" SYSENTER/SYSEXIT */ +#define X86_FEATURE_MTRR ( 0*32+12) /* "mtrr" Memory Type Range Registers */ +#define X86_FEATURE_PGE ( 0*32+13) /* "pge" Page Global Enable */ +#define X86_FEATURE_MCA ( 0*32+14) /* "mca" Machine Check Architecture */ +#define X86_FEATURE_CMOV ( 0*32+15) /* "cmov" CMOV instructions (plus FCMOVcc, FCOMI with FPU) */ +#define X86_FEATURE_PAT ( 0*32+16) /* "pat" Page Attribute Table */ +#define X86_FEATURE_PSE36 ( 0*32+17) /* "pse36" 36-bit PSEs */ +#define X86_FEATURE_PN ( 0*32+18) /* "pn" Processor serial number */ +#define X86_FEATURE_CLFLUSH ( 0*32+19) /* "clflush" CLFLUSH instruction */ #define X86_FEATURE_DS ( 0*32+21) /* "dts" Debug Store */ -#define X86_FEATURE_ACPI ( 0*32+22) /* ACPI via MSR */ -#define X86_FEATURE_MMX ( 0*32+23) /* Multimedia Extensions */ -#define X86_FEATURE_FXSR ( 0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */ +#define X86_FEATURE_ACPI ( 0*32+22) /* "acpi" ACPI via MSR */ +#define X86_FEATURE_MMX ( 0*32+23) /* "mmx" Multimedia Extensions */ +#define X86_FEATURE_FXSR ( 0*32+24) /* "fxsr" FXSAVE/FXRSTOR, CR4.OSFXSR */ #define X86_FEATURE_XMM ( 0*32+25) /* "sse" */ #define X86_FEATURE_XMM2 ( 0*32+26) /* "sse2" */ #define X86_FEATURE_SELFSNOOP ( 0*32+27) /* "ss" CPU self snoop */ -#define X86_FEATURE_HT ( 0*32+28) /* Hyper-Threading */ +#define X86_FEATURE_HT ( 0*32+28) /* "ht" Hyper-Threading */ #define X86_FEATURE_ACC ( 0*32+29) /* "tm" Automatic clock control */ -#define X86_FEATURE_IA64 ( 0*32+30) /* IA-64 processor */ -#define X86_FEATURE_PBE ( 0*32+31) /* Pending Break Enable */ +#define X86_FEATURE_IA64 ( 0*32+30) /* "ia64" IA-64 processor */ +#define X86_FEATURE_PBE ( 0*32+31) /* "pbe" Pending Break Enable */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* Don't duplicate feature flags which are redundant with Intel! */ -#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ -#define X86_FEATURE_MP ( 1*32+19) /* MP Capable */ -#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ -#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ -#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ +#define X86_FEATURE_SYSCALL ( 1*32+11) /* "syscall" SYSCALL/SYSRET */ +#define X86_FEATURE_MP ( 1*32+19) /* "mp" MP Capable */ +#define X86_FEATURE_NX ( 1*32+20) /* "nx" Execute Disable */ +#define X86_FEATURE_MMXEXT ( 1*32+22) /* "mmxext" AMD MMX extensions */ +#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* "fxsr_opt" FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ -#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ -#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */ -#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */ -#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */ +#define X86_FEATURE_RDTSCP ( 1*32+27) /* "rdtscp" RDTSCP */ +#define X86_FEATURE_LM ( 1*32+29) /* "lm" Long Mode (x86-64, 64-bit support) */ +#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* "3dnowext" AMD 3DNow extensions */ +#define X86_FEATURE_3DNOW ( 1*32+31) /* "3dnow" 3DNow */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ -#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ -#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* Longrun power control */ -#define X86_FEATURE_LRTI ( 2*32+ 3) /* LongRun table interface */ +#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* "recovery" CPU in recovery mode */ +#define X86_FEATURE_LONGRUN ( 2*32+ 1) /* "longrun" Longrun power control */ +#define X86_FEATURE_LRTI ( 2*32+ 3) /* "lrti" LongRun table interface */ /* Other features, Linux-defined mapping, word 3 */ /* This range is used for feature bits which conflict or are synthesized */ -#define X86_FEATURE_CXMMX ( 3*32+ 0) /* Cyrix MMX extensions */ -#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ -#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ -#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ -#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ -#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* "" CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ -#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ -#define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */ -#define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */ -#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ -#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ -#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ -#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ -#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ -#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ -#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ -#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ -#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ -#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ -#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */ -#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ -#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ -#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */ -#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */ -#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ -#define X86_FEATURE_RAPL ( 3*32+29) /* AMD/Hygon RAPL interface */ -#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ -#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ +#define X86_FEATURE_CXMMX ( 3*32+ 0) /* "cxmmx" Cyrix MMX extensions */ +#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* "k6_mtrr" AMD K6 nonstandard MTRRs */ +#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* "cyrix_arr" Cyrix ARRs (= MTRRs) */ +#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ +#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ +#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ +#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ +#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ +#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ +#define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ +#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* "arch_perfmon" Intel Architectural PerfMon */ +#define X86_FEATURE_PEBS ( 3*32+12) /* "pebs" Precise-Event Based Sampling */ +#define X86_FEATURE_BTS ( 3*32+13) /* "bts" Branch Trace Store */ +#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* syscall in IA32 userspace */ +#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* sysenter in IA32 userspace */ +#define X86_FEATURE_REP_GOOD ( 3*32+16) /* "rep_good" REP microcode works well */ +#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* "amd_lbr_v2" AMD Last Branch Record Extension Version 2 */ +#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* Clear CPU buffers using VERW */ +#define X86_FEATURE_ACC_POWER ( 3*32+19) /* "acc_power" AMD Accumulated Power Mechanism */ +#define X86_FEATURE_NOPL ( 3*32+20) /* "nopl" The NOPL (0F 1F) instructions */ +#define X86_FEATURE_ALWAYS ( 3*32+21) /* Always-present feature */ +#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* "xtopology" CPU topology enum extensions */ +#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* "tsc_reliable" TSC is known to be reliable */ +#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* "nonstop_tsc" TSC does not stop in C states */ +#define X86_FEATURE_CPUID ( 3*32+25) /* "cpuid" CPU has CPUID instruction itself */ +#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* "extd_apicid" Extended APICID (8 bits) */ +#define X86_FEATURE_AMD_DCM ( 3*32+27) /* "amd_dcm" AMD multi-node processor */ +#define X86_FEATURE_APERFMPERF ( 3*32+28) /* "aperfmperf" P-State hardware coordination feedback capability (APERF/MPERF MSRs) */ +#define X86_FEATURE_RAPL ( 3*32+29) /* "rapl" AMD/Hygon RAPL interface */ +#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* "nonstop_tsc_s3" TSC doesn't stop in S3 state */ +#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* "tsc_known_freq" TSC has known frequency */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ -#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ -#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ +#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* "pclmulqdq" PCLMULQDQ instruction */ +#define X86_FEATURE_DTES64 ( 4*32+ 2) /* "dtes64" 64-bit Debug Store */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */ -#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ -#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */ -#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ -#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ -#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ -#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ -#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ -#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ -#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */ -#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ -#define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */ -#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ -#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ +#define X86_FEATURE_VMX ( 4*32+ 5) /* "vmx" Hardware virtualization */ +#define X86_FEATURE_SMX ( 4*32+ 6) /* "smx" Safer Mode eXtensions */ +#define X86_FEATURE_EST ( 4*32+ 7) /* "est" Enhanced SpeedStep */ +#define X86_FEATURE_TM2 ( 4*32+ 8) /* "tm2" Thermal Monitor 2 */ +#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* "ssse3" Supplemental SSE-3 */ +#define X86_FEATURE_CID ( 4*32+10) /* "cid" Context ID */ +#define X86_FEATURE_SDBG ( 4*32+11) /* "sdbg" Silicon Debug */ +#define X86_FEATURE_FMA ( 4*32+12) /* "fma" Fused multiply-add */ +#define X86_FEATURE_CX16 ( 4*32+13) /* "cx16" CMPXCHG16B instruction */ +#define X86_FEATURE_XTPR ( 4*32+14) /* "xtpr" Send Task Priority Messages */ +#define X86_FEATURE_PDCM ( 4*32+15) /* "pdcm" Perf/Debug Capabilities MSR */ +#define X86_FEATURE_PCID ( 4*32+17) /* "pcid" Process Context Identifiers */ +#define X86_FEATURE_DCA ( 4*32+18) /* "dca" Direct Cache Access */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ -#define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */ -#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ -#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ -#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */ -#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ -#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */ -#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */ -#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ -#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */ -#define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */ -#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ +#define X86_FEATURE_X2APIC ( 4*32+21) /* "x2apic" X2APIC */ +#define X86_FEATURE_MOVBE ( 4*32+22) /* "movbe" MOVBE instruction */ +#define X86_FEATURE_POPCNT ( 4*32+23) /* "popcnt" POPCNT instruction */ +#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* "tsc_deadline_timer" TSC deadline timer */ +#define X86_FEATURE_AES ( 4*32+25) /* "aes" AES instructions */ +#define X86_FEATURE_XSAVE ( 4*32+26) /* "xsave" XSAVE/XRSTOR/XSETBV/XGETBV instructions */ +#define X86_FEATURE_OSXSAVE ( 4*32+27) /* XSAVE instruction enabled in the OS */ +#define X86_FEATURE_AVX ( 4*32+28) /* "avx" Advanced Vector Extensions */ +#define X86_FEATURE_F16C ( 4*32+29) /* "f16c" 16-bit FP conversions */ +#define X86_FEATURE_RDRAND ( 4*32+30) /* "rdrand" RDRAND instruction */ +#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* "hypervisor" Running on a hypervisor */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ #define X86_FEATURE_XSTORE ( 5*32+ 2) /* "rng" RNG present (xstore) */ #define X86_FEATURE_XSTORE_EN ( 5*32+ 3) /* "rng_en" RNG enabled */ #define X86_FEATURE_XCRYPT ( 5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */ #define X86_FEATURE_XCRYPT_EN ( 5*32+ 7) /* "ace_en" on-CPU crypto enabled */ -#define X86_FEATURE_ACE2 ( 5*32+ 8) /* Advanced Cryptography Engine v2 */ -#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* ACE v2 enabled */ -#define X86_FEATURE_PHE ( 5*32+10) /* PadLock Hash Engine */ -#define X86_FEATURE_PHE_EN ( 5*32+11) /* PHE enabled */ -#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ -#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ +#define X86_FEATURE_ACE2 ( 5*32+ 8) /* "ace2" Advanced Cryptography Engine v2 */ +#define X86_FEATURE_ACE2_EN ( 5*32+ 9) /* "ace2_en" ACE v2 enabled */ +#define X86_FEATURE_PHE ( 5*32+10) /* "phe" PadLock Hash Engine */ +#define X86_FEATURE_PHE_EN ( 5*32+11) /* "phe_en" PHE enabled */ +#define X86_FEATURE_PMM ( 5*32+12) /* "pmm" PadLock Montgomery Multiplier */ +#define X86_FEATURE_PMM_EN ( 5*32+13) /* "pmm_en" PMM enabled */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */ -#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */ -#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ -#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ -#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ -#define X86_FEATURE_SSE4A ( 6*32+ 6) /* SSE-4A */ -#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* Misaligned SSE mode */ -#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* 3DNow prefetch instructions */ -#define X86_FEATURE_OSVW ( 6*32+ 9) /* OS Visible Workaround */ -#define X86_FEATURE_IBS ( 6*32+10) /* Instruction Based Sampling */ -#define X86_FEATURE_XOP ( 6*32+11) /* extended AVX instructions */ -#define X86_FEATURE_SKINIT ( 6*32+12) /* SKINIT/STGI instructions */ -#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ -#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ -#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ -#define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */ -#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ -#define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */ -#define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */ -#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */ -#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ -#define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */ -#define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */ -#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ -#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */ +#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* "lahf_lm" LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* "cmp_legacy" If yes HyperThreading not valid */ +#define X86_FEATURE_SVM ( 6*32+ 2) /* "svm" Secure Virtual Machine */ +#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* "extapic" Extended APIC space */ +#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* "cr8_legacy" CR8 in 32-bit mode */ +#define X86_FEATURE_ABM ( 6*32+ 5) /* "abm" Advanced bit manipulation */ +#define X86_FEATURE_SSE4A ( 6*32+ 6) /* "sse4a" SSE-4A */ +#define X86_FEATURE_MISALIGNSSE ( 6*32+ 7) /* "misalignsse" Misaligned SSE mode */ +#define X86_FEATURE_3DNOWPREFETCH ( 6*32+ 8) /* "3dnowprefetch" 3DNow prefetch instructions */ +#define X86_FEATURE_OSVW ( 6*32+ 9) /* "osvw" OS Visible Workaround */ +#define X86_FEATURE_IBS ( 6*32+10) /* "ibs" Instruction Based Sampling */ +#define X86_FEATURE_XOP ( 6*32+11) /* "xop" Extended AVX instructions */ +#define X86_FEATURE_SKINIT ( 6*32+12) /* "skinit" SKINIT/STGI instructions */ +#define X86_FEATURE_WDT ( 6*32+13) /* "wdt" Watchdog timer */ +#define X86_FEATURE_LWP ( 6*32+15) /* "lwp" Light Weight Profiling */ +#define X86_FEATURE_FMA4 ( 6*32+16) /* "fma4" 4 operands MAC instructions */ +#define X86_FEATURE_TCE ( 6*32+17) /* "tce" Translation Cache Extension */ +#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* "nodeid_msr" NodeId MSR */ +#define X86_FEATURE_TBM ( 6*32+21) /* "tbm" Trailing Bit Manipulations */ +#define X86_FEATURE_TOPOEXT ( 6*32+22) /* "topoext" Topology extensions CPUID leafs */ +#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* "perfctr_core" Core performance counter extensions */ +#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* "perfctr_nb" NB performance counter extensions */ +#define X86_FEATURE_BPEXT ( 6*32+26) /* "bpext" Data breakpoint extension */ +#define X86_FEATURE_PTSC ( 6*32+27) /* "ptsc" Performance time-stamp counter */ +#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* "perfctr_llc" Last Level Cache performance counter extensions */ +#define X86_FEATURE_MWAITX ( 6*32+29) /* "mwaitx" MWAIT extension (MONITORX/MWAITX instructions) */ /* * Auxiliary flags: Linux defined - For features scattered in various @@ -189,93 +189,93 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */ -#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ -#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ -#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ -#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ -#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ -#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* Platform supports being a TDX host */ -#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ -#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ -#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */ -#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ -#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */ -#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */ -#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ -#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ -#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ -#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ -#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ -#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ -#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ -#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ -#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ -#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ -#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_ZEN ( 7*32+28) /* "" Generic flag for all Zen and newer */ -#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */ -#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */ -#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */ +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* "ring3mwait" Ring 3 MONITOR/MWAIT instructions */ +#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* "cpuid_fault" Intel CPUID faulting */ +#define X86_FEATURE_CPB ( 7*32+ 2) /* "cpb" AMD Core Performance Boost */ +#define X86_FEATURE_EPB ( 7*32+ 3) /* "epb" IA32_ENERGY_PERF_BIAS support */ +#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* "cat_l3" Cache Allocation Technology L3 */ +#define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* "cat_l2" Cache Allocation Technology L2 */ +#define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* "cdp_l3" Code and Data Prioritization L3 */ +#define X86_FEATURE_TDX_HOST_PLATFORM ( 7*32+ 7) /* "tdx_host_platform" Platform supports being a TDX host */ +#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* "hw_pstate" AMD HW-PState */ +#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* "proc_feedback" AMD ProcFeedbackInterface */ +#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* Use compacted XSTATE (XSAVES or XSAVEC) */ +#define X86_FEATURE_PTI ( 7*32+11) /* "pti" Kernel Page Table Isolation enabled */ +#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* Set/clear IBRS on kernel entry/exit */ +#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* Fill RSB on VM-Exit */ +#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* "intel_ppin" Intel Processor Inventory Number */ +#define X86_FEATURE_CDP_L2 ( 7*32+15) /* "cdp_l2" Code and Data Prioritization L2 */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* "ssbd" Speculative Store Bypass Disable */ +#define X86_FEATURE_MBA ( 7*32+18) /* "mba" Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ +#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* "perfmon_v2" AMD Performance Monitoring Version 2 */ +#define X86_FEATURE_USE_IBPB ( 7*32+21) /* Indirect Branch Prediction Barrier enabled */ +#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* "ibrs" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* "ibpb" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* "stibp" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* Generic flag for all Zen and newer */ +#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* L1TF workaround PTE inversion */ +#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* "ibrs_enhanced" Enhanced IBRS */ +#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* MSR IA32_FEAT_CTL configured */ /* Virtualization flags: Linux defined, word 8 */ -#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ -#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* Intel FlexPriority */ -#define X86_FEATURE_EPT ( 8*32+ 2) /* Intel Extended Page Table */ -#define X86_FEATURE_VPID ( 8*32+ 3) /* Intel Virtual Processor ID */ +#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* "tpr_shadow" Intel TPR Shadow */ +#define X86_FEATURE_FLEXPRIORITY ( 8*32+ 1) /* "flexpriority" Intel FlexPriority */ +#define X86_FEATURE_EPT ( 8*32+ 2) /* "ept" Intel Extended Page Table */ +#define X86_FEATURE_VPID ( 8*32+ 3) /* "vpid" Intel Virtual Processor ID */ -#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */ -#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ -#define X86_FEATURE_EPT_AD ( 8*32+17) /* Intel Extended Page Table access-dirty bit */ -#define X86_FEATURE_VMCALL ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */ -#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */ -#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */ -#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */ -#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */ +#define X86_FEATURE_VMMCALL ( 8*32+15) /* "vmmcall" Prefer VMMCALL to VMCALL */ +#define X86_FEATURE_XENPV ( 8*32+16) /* Xen paravirtual guest */ +#define X86_FEATURE_EPT_AD ( 8*32+17) /* "ept_ad" Intel Extended Page Table access-dirty bit */ +#define X86_FEATURE_VMCALL ( 8*32+18) /* Hypervisor supports the VMCALL instruction */ +#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* VMware prefers VMMCALL hypercall instruction */ +#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* PV unlock function */ +#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* PV vcpu_is_preempted function */ +#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* "tdx_guest" Intel Trust Domain Extensions Guest */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */ -#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ -#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */ -#define X86_FEATURE_SGX ( 9*32+ 2) /* Software Guard Extensions */ -#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ -#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ -#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ -#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* "" FPU data pointer updated only on x87 exceptions */ -#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ -#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ -#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */ -#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ -#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ -#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ -#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* "" Zero out FPU CS and FPU DS */ -#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */ -#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ -#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ -#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ -#define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */ -#define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */ -#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ -#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ -#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ -#define X86_FEATURE_CLWB ( 9*32+24) /* CLWB instruction */ -#define X86_FEATURE_INTEL_PT ( 9*32+25) /* Intel Processor Trace */ -#define X86_FEATURE_AVX512PF ( 9*32+26) /* AVX-512 Prefetch */ -#define X86_FEATURE_AVX512ER ( 9*32+27) /* AVX-512 Exponential and Reciprocal */ -#define X86_FEATURE_AVX512CD ( 9*32+28) /* AVX-512 Conflict Detection */ -#define X86_FEATURE_SHA_NI ( 9*32+29) /* SHA1/SHA256 Instruction Extensions */ -#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ -#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ +#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* "fsgsbase" RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/ +#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* "tsc_adjust" TSC adjustment MSR 0x3B */ +#define X86_FEATURE_SGX ( 9*32+ 2) /* "sgx" Software Guard Extensions */ +#define X86_FEATURE_BMI1 ( 9*32+ 3) /* "bmi1" 1st group bit manipulation extensions */ +#define X86_FEATURE_HLE ( 9*32+ 4) /* "hle" Hardware Lock Elision */ +#define X86_FEATURE_AVX2 ( 9*32+ 5) /* "avx2" AVX2 instructions */ +#define X86_FEATURE_FDP_EXCPTN_ONLY ( 9*32+ 6) /* FPU data pointer updated only on x87 exceptions */ +#define X86_FEATURE_SMEP ( 9*32+ 7) /* "smep" Supervisor Mode Execution Protection */ +#define X86_FEATURE_BMI2 ( 9*32+ 8) /* "bmi2" 2nd group bit manipulation extensions */ +#define X86_FEATURE_ERMS ( 9*32+ 9) /* "erms" Enhanced REP MOVSB/STOSB instructions */ +#define X86_FEATURE_INVPCID ( 9*32+10) /* "invpcid" Invalidate Processor Context ID */ +#define X86_FEATURE_RTM ( 9*32+11) /* "rtm" Restricted Transactional Memory */ +#define X86_FEATURE_CQM ( 9*32+12) /* "cqm" Cache QoS Monitoring */ +#define X86_FEATURE_ZERO_FCS_FDS ( 9*32+13) /* Zero out FPU CS and FPU DS */ +#define X86_FEATURE_MPX ( 9*32+14) /* "mpx" Memory Protection Extension */ +#define X86_FEATURE_RDT_A ( 9*32+15) /* "rdt_a" Resource Director Technology Allocation */ +#define X86_FEATURE_AVX512F ( 9*32+16) /* "avx512f" AVX-512 Foundation */ +#define X86_FEATURE_AVX512DQ ( 9*32+17) /* "avx512dq" AVX-512 DQ (Double/Quad granular) Instructions */ +#define X86_FEATURE_RDSEED ( 9*32+18) /* "rdseed" RDSEED instruction */ +#define X86_FEATURE_ADX ( 9*32+19) /* "adx" ADCX and ADOX instructions */ +#define X86_FEATURE_SMAP ( 9*32+20) /* "smap" Supervisor Mode Access Prevention */ +#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* "avx512ifma" AVX-512 Integer Fused Multiply-Add instructions */ +#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* "clflushopt" CLFLUSHOPT instruction */ +#define X86_FEATURE_CLWB ( 9*32+24) /* "clwb" CLWB instruction */ +#define X86_FEATURE_INTEL_PT ( 9*32+25) /* "intel_pt" Intel Processor Trace */ +#define X86_FEATURE_AVX512PF ( 9*32+26) /* "avx512pf" AVX-512 Prefetch */ +#define X86_FEATURE_AVX512ER ( 9*32+27) /* "avx512er" AVX-512 Exponential and Reciprocal */ +#define X86_FEATURE_AVX512CD ( 9*32+28) /* "avx512cd" AVX-512 Conflict Detection */ +#define X86_FEATURE_SHA_NI ( 9*32+29) /* "sha_ni" SHA1/SHA256 Instruction Extensions */ +#define X86_FEATURE_AVX512BW ( 9*32+30) /* "avx512bw" AVX-512 BW (Byte/Word granular) Instructions */ +#define X86_FEATURE_AVX512VL ( 9*32+31) /* "avx512vl" AVX-512 VL (128/256 Vector Length) Extensions */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */ -#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */ -#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */ -#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */ -#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */ -#define X86_FEATURE_XFD (10*32+ 4) /* "" eXtended Feature Disabling */ +#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* "xsaveopt" XSAVEOPT instruction */ +#define X86_FEATURE_XSAVEC (10*32+ 1) /* "xsavec" XSAVEC instruction */ +#define X86_FEATURE_XGETBV1 (10*32+ 2) /* "xgetbv1" XGETBV with ECX = 1 instruction */ +#define X86_FEATURE_XSAVES (10*32+ 3) /* "xsaves" XSAVES/XRSTORS instructions */ +#define X86_FEATURE_XFD (10*32+ 4) /* eXtended Feature Disabling */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -283,181 +283,183 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_CQM_LLC (11*32+ 0) /* LLC QoS if 1 */ -#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* LLC occupancy monitoring */ -#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* LLC Total MBM monitoring */ -#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */ -#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */ -#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */ -#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */ -#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */ -#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */ -#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */ -#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */ -#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */ -#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */ -#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */ -#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */ -#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */ -#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ -#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ -#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ -#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ -#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ -#define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ -#define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ -#define X86_FEATURE_USER_SHSTK (11*32+23) /* Shadow stack support for user mode applications */ -#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ -#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ -#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ -#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* "" IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ -#define X86_FEATURE_ZEN2 (11*32+28) /* "" CPU based on Zen2 microarchitecture */ -#define X86_FEATURE_ZEN3 (11*32+29) /* "" CPU based on Zen3 microarchitecture */ -#define X86_FEATURE_ZEN4 (11*32+30) /* "" CPU based on Zen4 microarchitecture */ -#define X86_FEATURE_ZEN1 (11*32+31) /* "" CPU based on Zen1 microarchitecture */ +#define X86_FEATURE_CQM_LLC (11*32+ 0) /* "cqm_llc" LLC QoS if 1 */ +#define X86_FEATURE_CQM_OCCUP_LLC (11*32+ 1) /* "cqm_occup_llc" LLC occupancy monitoring */ +#define X86_FEATURE_CQM_MBM_TOTAL (11*32+ 2) /* "cqm_mbm_total" LLC Total MBM monitoring */ +#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* "cqm_mbm_local" LLC Local MBM monitoring */ +#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* LFENCE in user entry SWAPGS path */ +#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* LFENCE in kernel entry SWAPGS path */ +#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* "split_lock_detect" #AC for split lock */ +#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* Per-thread Memory Bandwidth Allocation */ +#define X86_FEATURE_SGX1 (11*32+ 8) /* Basic SGX */ +#define X86_FEATURE_SGX2 (11*32+ 9) /* SGX Enclave Dynamic Memory Management (EDMM) */ +#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* Issue an IBPB on kernel entry */ +#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* RET prediction control */ +#define X86_FEATURE_RETPOLINE (11*32+12) /* Generic Retpoline mitigation for Spectre variant 2 */ +#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* Use LFENCE for Spectre variant 2 */ +#define X86_FEATURE_RETHUNK (11*32+14) /* Use REturn THUNK */ +#define X86_FEATURE_UNRET (11*32+15) /* AMD BTB untrain return */ +#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* Use IBPB during runtime firmware calls */ +#define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* Fill RSB on VM exit when EIBRS is enabled */ +#define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* SGX EDECCSSA user leaf function */ +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* Call depth tracking for RSB stuffing */ +#define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* MSR IA32_TSX_CTRL (Intel) implemented */ +#define X86_FEATURE_SMBA (11*32+21) /* Slow Memory Bandwidth Allocation */ +#define X86_FEATURE_BMEC (11*32+22) /* Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_USER_SHSTK (11*32+23) /* "user_shstk" Shadow stack support for user mode applications */ +#define X86_FEATURE_SRSO (11*32+24) /* AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* Issue an IBPB only on VMEXIT */ +#define X86_FEATURE_APIC_MSRS_FENCE (11*32+27) /* IA32_TSC_DEADLINE and X2APIC MSRs need fencing */ +#define X86_FEATURE_ZEN2 (11*32+28) /* CPU based on Zen2 microarchitecture */ +#define X86_FEATURE_ZEN3 (11*32+29) /* CPU based on Zen3 microarchitecture */ +#define X86_FEATURE_ZEN4 (11*32+30) /* CPU based on Zen4 microarchitecture */ +#define X86_FEATURE_ZEN1 (11*32+31) /* CPU based on Zen1 microarchitecture */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ -#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ -#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ -#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* "" Intel Architectural PerfMon Extension */ -#define X86_FEATURE_FZRM (12*32+10) /* "" Fast zero-length REP MOVSB */ -#define X86_FEATURE_FSRS (12*32+11) /* "" Fast short REP STOSB */ -#define X86_FEATURE_FSRC (12*32+12) /* "" Fast short REP {CMPSB,SCASB} */ -#define X86_FEATURE_FRED (12*32+17) /* Flexible Return and Event Delivery */ -#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ -#define X86_FEATURE_WRMSRNS (12*32+19) /* "" Non-serializing WRMSR */ -#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ -#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ -#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */ +#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* "avx_vnni" AVX VNNI instructions */ +#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* "avx512_bf16" AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* CMPccXADD instructions */ +#define X86_FEATURE_ARCH_PERFMON_EXT (12*32+ 8) /* Intel Architectural PerfMon Extension */ +#define X86_FEATURE_FZRM (12*32+10) /* Fast zero-length REP MOVSB */ +#define X86_FEATURE_FSRS (12*32+11) /* Fast short REP STOSB */ +#define X86_FEATURE_FSRC (12*32+12) /* Fast short REP {CMPSB,SCASB} */ +#define X86_FEATURE_FRED (12*32+17) /* "fred" Flexible Return and Event Delivery */ +#define X86_FEATURE_LKGS (12*32+18) /* Load "kernel" (userspace) GS */ +#define X86_FEATURE_WRMSRNS (12*32+19) /* Non-serializing WRMSR */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* Support for VPMADD52[H,L]UQ */ +#define X86_FEATURE_LAM (12*32+26) /* "lam" Linear Address Masking */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ -#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ -#define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ -#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_RDPRU (13*32+ 4) /* Read processor register at user level */ -#define X86_FEATURE_WBNOINVD (13*32+ 9) /* WBNOINVD instruction */ -#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ -#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ -#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */ -#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */ -#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ -#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */ -#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */ -#define X86_FEATURE_AMD_PSFD (13*32+28) /* "" Predictive Store Forwarding Disable */ -#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */ -#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */ +#define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ +#define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ +#define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ +#define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ +#define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ +#define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ +#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* Speculative Store Bypass is fixed in hardware. */ +#define X86_FEATURE_CPPC (13*32+27) /* "cppc" Collaborative Processor Performance Control */ +#define X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define X86_FEATURE_BTC_NO (13*32+29) /* Not vulnerable to Branch Type Confusion */ +#define X86_FEATURE_BRS (13*32+31) /* "brs" Branch Sampling available */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ -#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ -#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ -#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ -#define X86_FEATURE_PLN (14*32+ 4) /* Intel Power Limit Notification */ -#define X86_FEATURE_PTS (14*32+ 6) /* Intel Package Thermal Status */ -#define X86_FEATURE_HWP (14*32+ 7) /* Intel Hardware P-states */ -#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* HWP Notification */ -#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ -#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ -#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ -#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ +#define X86_FEATURE_DTHERM (14*32+ 0) /* "dtherm" Digital Thermal Sensor */ +#define X86_FEATURE_IDA (14*32+ 1) /* "ida" Intel Dynamic Acceleration */ +#define X86_FEATURE_ARAT (14*32+ 2) /* "arat" Always Running APIC Timer */ +#define X86_FEATURE_PLN (14*32+ 4) /* "pln" Intel Power Limit Notification */ +#define X86_FEATURE_PTS (14*32+ 6) /* "pts" Intel Package Thermal Status */ +#define X86_FEATURE_HWP (14*32+ 7) /* "hwp" Intel Hardware P-states */ +#define X86_FEATURE_HWP_NOTIFY (14*32+ 8) /* "hwp_notify" HWP Notification */ +#define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* "hwp_act_window" HWP Activity Window */ +#define X86_FEATURE_HWP_EPP (14*32+10) /* "hwp_epp" HWP Energy Perf. Preference */ +#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* "hwp_pkg_req" HWP Package Level Request */ +#define X86_FEATURE_HWP_HIGHEST_PERF_CHANGE (14*32+15) /* HWP Highest perf change */ +#define X86_FEATURE_HFI (14*32+19) /* "hfi" Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ -#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ -#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ +#define X86_FEATURE_NPT (15*32+ 0) /* "npt" Nested Page Table support */ +#define X86_FEATURE_LBRV (15*32+ 1) /* "lbrv" LBR Virtualization support */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_NRIPS (15*32+ 3) /* "nrip_save" SVM next_rip save */ #define X86_FEATURE_TSCRATEMSR (15*32+ 4) /* "tsc_scale" TSC scaling support */ #define X86_FEATURE_VMCBCLEAN (15*32+ 5) /* "vmcb_clean" VMCB clean bits support */ -#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* flush-by-ASID support */ -#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* Decode Assists support */ -#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ -#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ -#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ -#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ -#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ -#define X86_FEATURE_X2AVIC (15*32+18) /* Virtual x2apic */ -#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* Virtual SPEC_CTRL */ -#define X86_FEATURE_VNMI (15*32+25) /* Virtual NMI */ -#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* "" SVME addr check */ +#define X86_FEATURE_FLUSHBYASID (15*32+ 6) /* "flushbyasid" Flush-by-ASID support */ +#define X86_FEATURE_DECODEASSISTS (15*32+ 7) /* "decodeassists" Decode Assists support */ +#define X86_FEATURE_PAUSEFILTER (15*32+10) /* "pausefilter" Filtered pause intercept */ +#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* "pfthreshold" Pause filter threshold */ +#define X86_FEATURE_AVIC (15*32+13) /* "avic" Virtual Interrupt Controller */ +#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* "v_vmsave_vmload" Virtual VMSAVE VMLOAD */ +#define X86_FEATURE_VGIF (15*32+16) /* "vgif" Virtual GIF */ +#define X86_FEATURE_X2AVIC (15*32+18) /* "x2avic" Virtual x2apic */ +#define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ +#define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ +#define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ -#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ -#define X86_FEATURE_UMIP (16*32+ 2) /* User Mode Instruction Protection */ -#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ -#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ -#define X86_FEATURE_WAITPKG (16*32+ 5) /* UMONITOR/UMWAIT/TPAUSE Instructions */ -#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ -#define X86_FEATURE_SHSTK (16*32+ 7) /* "" Shadow stack */ -#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ -#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ -#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */ -#define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */ -#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ -#define X86_FEATURE_TME (16*32+13) /* Intel Total Memory Encryption */ -#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ -#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ -#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* Bus Lock detect */ -#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ -#define X86_FEATURE_MOVDIRI (16*32+27) /* MOVDIRI instruction */ -#define X86_FEATURE_MOVDIR64B (16*32+28) /* MOVDIR64B instruction */ -#define X86_FEATURE_ENQCMD (16*32+29) /* ENQCMD and ENQCMDS instructions */ -#define X86_FEATURE_SGX_LC (16*32+30) /* Software Guard Extensions Launch Control */ +#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ +#define X86_FEATURE_UMIP (16*32+ 2) /* "umip" User Mode Instruction Protection */ +#define X86_FEATURE_PKU (16*32+ 3) /* "pku" Protection Keys for Userspace */ +#define X86_FEATURE_OSPKE (16*32+ 4) /* "ospke" OS Protection Keys Enable */ +#define X86_FEATURE_WAITPKG (16*32+ 5) /* "waitpkg" UMONITOR/UMWAIT/TPAUSE Instructions */ +#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* "avx512_vbmi2" Additional AVX512 Vector Bit Manipulation Instructions */ +#define X86_FEATURE_SHSTK (16*32+ 7) /* Shadow stack */ +#define X86_FEATURE_GFNI (16*32+ 8) /* "gfni" Galois Field New Instructions */ +#define X86_FEATURE_VAES (16*32+ 9) /* "vaes" Vector AES */ +#define X86_FEATURE_VPCLMULQDQ (16*32+10) /* "vpclmulqdq" Carry-Less Multiplication Double Quadword */ +#define X86_FEATURE_AVX512_VNNI (16*32+11) /* "avx512_vnni" Vector Neural Network Instructions */ +#define X86_FEATURE_AVX512_BITALG (16*32+12) /* "avx512_bitalg" Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */ +#define X86_FEATURE_TME (16*32+13) /* "tme" Intel Total Memory Encryption */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* "avx512_vpopcntdq" POPCNT for vectors of DW/QW */ +#define X86_FEATURE_LA57 (16*32+16) /* "la57" 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* "rdpid" RDPID instruction */ +#define X86_FEATURE_BUS_LOCK_DETECT (16*32+24) /* "bus_lock_detect" Bus Lock detect */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* "cldemote" CLDEMOTE instruction */ +#define X86_FEATURE_MOVDIRI (16*32+27) /* "movdiri" MOVDIRI instruction */ +#define X86_FEATURE_MOVDIR64B (16*32+28) /* "movdir64b" MOVDIR64B instruction */ +#define X86_FEATURE_ENQCMD (16*32+29) /* "enqcmd" ENQCMD and ENQCMDS instructions */ +#define X86_FEATURE_SGX_LC (16*32+30) /* "sgx_lc" Software Guard Extensions Launch Control */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ -#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ -#define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */ -#define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */ +#define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* "overflow_recov" MCA overflow recovery support */ +#define X86_FEATURE_SUCCOR (17*32+ 1) /* "succor" Uncorrectable error containment and recovery */ +#define X86_FEATURE_SMCA (17*32+ 3) /* "smca" Scalable MCA */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */ -#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */ -#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */ -#define X86_FEATURE_FSRM (18*32+ 4) /* Fast Short Rep Mov */ -#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* AVX-512 Intersect for D/Q */ -#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */ -#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ -#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* "" RTM transaction always aborts */ -#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ -#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ -#define X86_FEATURE_HYBRID_CPU (18*32+15) /* "" This part has CPUs of more than one type */ -#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ -#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ -#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ -#define X86_FEATURE_IBT (18*32+20) /* Indirect Branch Tracking */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ -#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ -#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ -#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ -#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ -#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */ -#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ +#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* "avx512_4vnniw" AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* "avx512_4fmaps" AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_FSRM (18*32+ 4) /* "fsrm" Fast Short Rep Mov */ +#define X86_FEATURE_AVX512_VP2INTERSECT (18*32+ 8) /* "avx512_vp2intersect" AVX-512 Intersect for D/Q */ +#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* SRBDS mitigation MSR available */ +#define X86_FEATURE_MD_CLEAR (18*32+10) /* "md_clear" VERW clears CPU buffers */ +#define X86_FEATURE_RTM_ALWAYS_ABORT (18*32+11) /* RTM transaction always aborts */ +#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* TSX_FORCE_ABORT */ +#define X86_FEATURE_SERIALIZE (18*32+14) /* "serialize" SERIALIZE instruction */ +#define X86_FEATURE_HYBRID_CPU (18*32+15) /* This part has CPUs of more than one type */ +#define X86_FEATURE_TSXLDTRK (18*32+16) /* "tsxldtrk" TSX Suspend Load Address Tracking */ +#define X86_FEATURE_PCONFIG (18*32+18) /* "pconfig" Intel PCONFIG */ +#define X86_FEATURE_ARCH_LBR (18*32+19) /* "arch_lbr" Intel ARCH LBR */ +#define X86_FEATURE_IBT (18*32+20) /* "ibt" Indirect Branch Tracking */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* "amx_bf16" AMX bf16 Support */ +#define X86_FEATURE_AVX512_FP16 (18*32+23) /* "avx512_fp16" AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* "amx_tile" AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* "amx_int8" AMX int8 Support */ +#define X86_FEATURE_SPEC_CTRL (18*32+26) /* Speculation Control (IBRS + IBPB) */ +#define X86_FEATURE_INTEL_STIBP (18*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_FLUSH_L1D (18*32+28) /* "flush_l1d" Flush L1D cache */ +#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* "arch_capabilities" IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* IA32_CORE_CAPABILITIES MSR */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */ -#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* AMD Secure Encrypted Virtualization - Secure Nested Paging */ -#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ -#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* "" No Nested Data Breakpoints */ -#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* "" WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ -#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* "" LFENCE always serializing / synchronizes RDTSC */ -#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* "" Null Selector Clears Base */ -#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ -#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ +#define X86_FEATURE_WRMSR_XX_BASE_NS (20*32+ 1) /* WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing */ +#define X86_FEATURE_LFENCE_RDTSC (20*32+ 2) /* LFENCE always serializing / synchronizes RDTSC */ +#define X86_FEATURE_NULL_SEL_CLR_BASE (20*32+ 6) /* Null Selector Clears Base */ +#define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ +#define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ -#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ -#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ -#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ +#define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -465,59 +467,60 @@ * * Reuse free bits when adding new feature flags! */ -#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ -#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ -#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ -#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* "amd_lbr_pmc_freeze" AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_FAST_CPPC (21*32 + 5) /* AMD Fast CPPC */ /* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) -#define X86_BUG_F00F X86_BUG(0) /* Intel F00F */ -#define X86_BUG_FDIV X86_BUG(1) /* FPU FDIV */ -#define X86_BUG_COMA X86_BUG(2) /* Cyrix 6x86 coma */ +#define X86_BUG_F00F X86_BUG(0) /* "f00f" Intel F00F */ +#define X86_BUG_FDIV X86_BUG(1) /* "fdiv" FPU FDIV */ +#define X86_BUG_COMA X86_BUG(2) /* "coma" Cyrix 6x86 coma */ #define X86_BUG_AMD_TLB_MMATCH X86_BUG(3) /* "tlb_mmatch" AMD Erratum 383 */ #define X86_BUG_AMD_APIC_C1E X86_BUG(4) /* "apic_c1e" AMD Erratum 400 */ -#define X86_BUG_11AP X86_BUG(5) /* Bad local APIC aka 11AP */ -#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* FXSAVE leaks FOP/FIP/FOP */ -#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* AAI65, CLFLUSH required before MONITOR */ -#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* SYSRET doesn't fix up SS attrs */ +#define X86_BUG_11AP X86_BUG(5) /* "11ap" Bad local APIC aka 11AP */ +#define X86_BUG_FXSAVE_LEAK X86_BUG(6) /* "fxsave_leak" FXSAVE leaks FOP/FIP/FOP */ +#define X86_BUG_CLFLUSH_MONITOR X86_BUG(7) /* "clflush_monitor" AAI65, CLFLUSH required before MONITOR */ +#define X86_BUG_SYSRET_SS_ATTRS X86_BUG(8) /* "sysret_ss_attrs" SYSRET doesn't fix up SS attrs */ #ifdef CONFIG_X86_32 /* * 64-bit kernels don't use X86_BUG_ESPFIX. Make the define conditional * to avoid confusion. */ -#define X86_BUG_ESPFIX X86_BUG(9) /* "" IRET to 16-bit SS corrupts ESP/RSP high bits */ +#define X86_BUG_ESPFIX X86_BUG(9) /* IRET to 16-bit SS corrupts ESP/RSP high bits */ #endif -#define X86_BUG_NULL_SEG X86_BUG(10) /* Nulling a selector preserves the base */ -#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ -#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ -#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ -#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ -#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ -#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ -#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ -#define X86_BUG_L1TF X86_BUG(18) /* CPU is affected by L1 Terminal Fault */ -#define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ -#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ -#define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ -#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ -#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ -#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */ -#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* CPU is too old and its MMIO Stale Data status is unknown */ -#define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ -#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ -#define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ -#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ -#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* CPU may incur #MC if non-TD software does partial write to TDX private memory */ +#define X86_BUG_NULL_SEG X86_BUG(10) /* "null_seg" Nulling a selector preserves the base */ +#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* "swapgs_fence" SWAPGS without input dep on GS */ +#define X86_BUG_MONITOR X86_BUG(12) /* "monitor" IPI required to wake up remote CPU */ +#define X86_BUG_AMD_E400 X86_BUG(13) /* "amd_e400" CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* "cpu_meltdown" CPU is affected by meltdown attack and needs kernel page table isolation */ +#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* "spectre_v1" CPU is affected by Spectre variant 1 attack with conditional branches */ +#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* "spectre_v2" CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* "spec_store_bypass" CPU is affected by speculative store bypass attack */ +#define X86_BUG_L1TF X86_BUG(18) /* "l1tf" CPU is affected by L1 Terminal Fault */ +#define X86_BUG_MDS X86_BUG(19) /* "mds" CPU is affected by Microarchitectural data sampling */ +#define X86_BUG_MSBDS_ONLY X86_BUG(20) /* "msbds_only" CPU is only affected by the MSDBS variant of BUG_MDS */ +#define X86_BUG_SWAPGS X86_BUG(21) /* "swapgs" CPU is affected by speculation through SWAPGS */ +#define X86_BUG_TAA X86_BUG(22) /* "taa" CPU is affected by TSX Async Abort(TAA) */ +#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ +#define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ +#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ +#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +#define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ +#define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ +#define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* "gds" CPU is affected by Gather Data Sampling */ +#define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e022e6eb766c64050aedcefc5b8962c2049184c6..82c6a4d350e09e6d8f98f11189cc5cb8f5e58a7c 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -566,6 +566,12 @@ #define MSR_RELOAD_PMC0 0x000014c1 #define MSR_RELOAD_FIXED_CTR0 0x00001309 +/* V6 PMON MSR range */ +#define MSR_IA32_PMC_V6_GP0_CTR 0x1900 +#define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_STEP 4 + /* KeyID partitioning between MKTME and TDX */ #define MSR_IA32_MKTME_KEYID_PARTITIONING 0x00000087 @@ -660,6 +666,8 @@ #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_SVSM_CAA 0xc001f000 + /* AMD Collaborative Processor Performance Control MSRs */ #define MSR_AMD_CPPC_CAP1 0xc00102b0 #define MSR_AMD_CPPC_ENABLE 0xc00102b1 @@ -781,6 +789,8 @@ #define MSR_K7_HWCR_IRPERF_EN BIT_ULL(MSR_K7_HWCR_IRPERF_EN_BIT) #define MSR_K7_FID_VID_CTL 0xc0010041 #define MSR_K7_FID_VID_STATUS 0xc0010042 +#define MSR_K7_HWCR_CPB_DIS_BIT 25 +#define MSR_K7_HWCR_CPB_DIS BIT_ULL(MSR_K7_HWCR_CPB_DIS_BIT) /* K6 MSRs */ #define MSR_K6_WHCR 0xc0000082 @@ -1164,6 +1174,7 @@ #define MSR_IA32_QM_CTR 0xc8e #define MSR_IA32_PQR_ASSOC 0xc8f #define MSR_IA32_L3_CBM_BASE 0xc90 +#define MSR_RMID_SNC_CONFIG 0xca0 #define MSR_IA32_L2_CBM_BASE 0xd10 #define MSR_IA32_MBA_THRTL_BASE 0xd50 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 9fae1b73b529caf53c5c7dd1823fd7a3f320637c..bf57a824f72281218a7c145e44587f3845a50aad 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -106,6 +106,7 @@ struct kvm_ioapic_state { #define KVM_RUN_X86_SMM (1 << 0) #define KVM_RUN_X86_BUS_LOCK (1 << 1) +#define KVM_RUN_X86_GUEST_MODE (1 << 2) /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { @@ -697,6 +698,11 @@ enum sev_cmd_id { /* Second time is the charm; improved versions of the above ioctls. */ KVM_SEV_INIT2, + /* SNP-specific commands */ + KVM_SEV_SNP_LAUNCH_START = 100, + KVM_SEV_SNP_LAUNCH_UPDATE, + KVM_SEV_SNP_LAUNCH_FINISH, + KVM_SEV_NR_MAX, }; @@ -824,6 +830,48 @@ struct kvm_sev_receive_update_data { __u32 pad2; }; +struct kvm_sev_snp_launch_start { + __u64 policy; + __u8 gosvw[16]; + __u16 flags; + __u8 pad0[6]; + __u64 pad1[4]; +}; + +/* Kept in sync with firmware values for simplicity. */ +#define KVM_SEV_SNP_PAGE_TYPE_NORMAL 0x1 +#define KVM_SEV_SNP_PAGE_TYPE_ZERO 0x3 +#define KVM_SEV_SNP_PAGE_TYPE_UNMEASURED 0x4 +#define KVM_SEV_SNP_PAGE_TYPE_SECRETS 0x5 +#define KVM_SEV_SNP_PAGE_TYPE_CPUID 0x6 + +struct kvm_sev_snp_launch_update { + __u64 gfn_start; + __u64 uaddr; + __u64 len; + __u8 type; + __u8 pad0; + __u16 flags; + __u32 pad1; + __u64 pad2[4]; +}; + +#define KVM_SEV_SNP_ID_BLOCK_SIZE 96 +#define KVM_SEV_SNP_ID_AUTH_SIZE 4096 +#define KVM_SEV_SNP_FINISH_DATA_SIZE 32 + +struct kvm_sev_snp_launch_finish { + __u64 id_block_uaddr; + __u64 id_auth_uaddr; + __u8 id_block_en; + __u8 auth_key_en; + __u8 vcek_disabled; + __u8 host_data[KVM_SEV_SNP_FINISH_DATA_SIZE]; + __u8 pad0[3]; + __u16 flags; + __u64 pad1[4]; +}; + #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) #define KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK (1ULL << 1) @@ -874,5 +922,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SW_PROTECTED_VM 1 #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 +#define KVM_X86_SNP_VM 4 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 80e1df482337dfe064d809966a309cecd0930070..1814b413fd5783d295a7c85bf67f27835467a4ce 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -115,6 +115,7 @@ #define SVM_VMGEXIT_AP_CREATE_ON_INIT 0 #define SVM_VMGEXIT_AP_CREATE 1 #define SVM_VMGEXIT_AP_DESTROY 2 +#define SVM_VMGEXIT_SNP_RUN_VMPL 0x80000018 #define SVM_VMGEXIT_HV_FEATURES 0x8000fffd #define SVM_VMGEXIT_TERM_REQUEST 0x8000fffe #define SVM_VMGEXIT_TERM_REASON(reason_set, reason_code) \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 489cbed7e82a7d59aacc5923d9c769a400788ab2..12796808f07a8c0d21d1ebdeddba8d6cf688c25a 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -82,7 +82,30 @@ FILES= \ FILES := $(addprefix $(OUTPUT),$(FILES)) -PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, are required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + endif + endif +endif all: $(FILES) @@ -147,7 +170,17 @@ $(OUTPUT)test-libopencsd.bin: DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) -DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif $(OUTPUT)test-dwarf.bin: @@ -178,27 +211,27 @@ $(OUTPUT)test-numa_num_possible_cpus.bin: $(BUILD) -lnuma $(OUTPUT)test-libunwind.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-debug-frame.bin: - $(BUILD) -lelf + $(BUILD) -lelf -llzma $(OUTPUT)test-libunwind-x86.bin: - $(BUILD) -lelf -lunwind-x86 + $(BUILD) -lelf -llzma -lunwind-x86 $(OUTPUT)test-libunwind-x86_64.bin: - $(BUILD) -lelf -lunwind-x86_64 + $(BUILD) -lelf -llzma -lunwind-x86_64 $(OUTPUT)test-libunwind-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libunwind-debug-frame-arm.bin: - $(BUILD) -lelf -lunwind-arm + $(BUILD) -lelf -llzma -lunwind-arm $(OUTPUT)test-libunwind-debug-frame-aarch64.bin: - $(BUILD) -lelf -lunwind-aarch64 + $(BUILD) -lelf -llzma -lunwind-aarch64 $(OUTPUT)test-libaudit.bin: $(BUILD) -laudit diff --git a/tools/include/uapi/README b/tools/include/uapi/README new file mode 100644 index 0000000000000000000000000000000000000000..7147b1b2cb2850eec4a5e71de8e9addd0709a871 --- /dev/null +++ b/tools/include/uapi/README @@ -0,0 +1,73 @@ +Why we want a copy of kernel headers in tools? +============================================== + +There used to be no copies, with tools/ code using kernel headers +directly. From time to time tools/perf/ broke due to legitimate kernel +hacking. At some point Linus complained about such direct usage. Then we +adopted the current model. + +The way these headers are used in perf are not restricted to just +including them to compile something. + +There are sometimes used in scripts that convert defines into string +tables, etc, so some change may break one of these scripts, or new MSRs +may use some different #define pattern, etc. + +E.g.: + + $ ls -1 tools/perf/trace/beauty/*.sh | head -5 + tools/perf/trace/beauty/arch_errno_names.sh + tools/perf/trace/beauty/drm_ioctl.sh + tools/perf/trace/beauty/fadvise.sh + tools/perf/trace/beauty/fsconfig.sh + tools/perf/trace/beauty/fsmount.sh + $ + $ tools/perf/trace/beauty/fadvise.sh + static const char *fadvise_advices[] = { + [0] = "NORMAL", + [1] = "RANDOM", + [2] = "SEQUENTIAL", + [3] = "WILLNEED", + [4] = "DONTNEED", + [5] = "NOREUSE", + }; + $ + +The tools/perf/check-headers.sh script, part of the tools/ build +process, points out changes in the original files. + +So its important not to touch the copies in tools/ when doing changes in +the original kernel headers, that will be done later, when +check-headers.sh inform about the change to the perf tools hackers. + +Another explanation from Ingo Molnar: +It's better than all the alternatives we tried so far: + + - Symbolic links and direct #includes: this was the original approach but + was pushed back on from the kernel side, when tooling modified the + headers and broke them accidentally for kernel builds. + + - Duplicate self-defined ABI headers like glibc: double the maintenance + burden, double the chance for mistakes, plus there's no tech-driven + notification mechanism to look at new kernel side changes. + +What we are doing now is a third option: + + - A software-enforced copy-on-write mechanism of kernel headers to + tooling, driven by non-fatal warnings on the tooling side build when + kernel headers get modified: + + Warning: Kernel ABI header differences: + diff -u tools/include/uapi/drm/i915_drm.h include/uapi/drm/i915_drm.h + diff -u tools/include/uapi/linux/fs.h include/uapi/linux/fs.h + diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h + ... + + The tooling policy is to always pick up the kernel side headers as-is, + and integate them into the tooling build. The warnings above serve as a + notification to tooling maintainers that there's changes on the kernel + side. + +We've been using this for many years now, and it might seem hacky, but +works surprisingly well. + diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index a00d53d027235a4a27760cb9aac33048ab641420..5bf6148cac2b93e36f362236545e2e6eb473d5ff 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -737,7 +737,7 @@ __SC_COMP(__NR_pselect6_time64, sys_pselect6, compat_sys_pselect6_time64) #define __NR_ppoll_time64 414 __SC_COMP(__NR_ppoll_time64, sys_ppoll, compat_sys_ppoll_time64) #define __NR_io_pgetevents_time64 416 -__SYSCALL(__NR_io_pgetevents_time64, sys_io_pgetevents) +__SC_COMP(__NR_io_pgetevents_time64, sys_io_pgetevents, compat_sys_io_pgetevents_time64) #define __NR_recvmmsg_time64 417 __SC_COMP(__NR_recvmmsg_time64, sys_recvmmsg, compat_sys_recvmmsg_time64) #define __NR_mq_timedsend_time64 418 diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h index d4d86e566e0777e7173ccfc9deb30d5ef10d3cc8..535cb68fdb5c4b49b3ae4784338b66e5bb18268f 100644 --- a/tools/include/uapi/drm/i915_drm.h +++ b/tools/include/uapi/drm/i915_drm.h @@ -2163,6 +2163,15 @@ struct drm_i915_gem_context_param { * supports this per context flag. */ #define I915_CONTEXT_PARAM_LOW_LATENCY 0xe + +/* + * I915_CONTEXT_PARAM_CONTEXT_IMAGE: + * + * Allows userspace to provide own context images. + * + * Note that this is a debug API not available on production kernel builds. + */ +#define I915_CONTEXT_PARAM_CONTEXT_IMAGE 0xf /* Must be kept compact -- no holes and well documented */ /** @value: Context parameter value to be set or queried */ @@ -2564,6 +2573,24 @@ struct i915_context_param_engines { struct i915_engine_class_instance engines[N__]; \ } __attribute__((packed)) name__ +struct i915_gem_context_param_context_image { + /** @engine: Engine class & instance to be configured. */ + struct i915_engine_class_instance engine; + + /** @flags: One of the supported flags or zero. */ + __u32 flags; +#define I915_CONTEXT_IMAGE_FLAG_ENGINE_INDEX (1u << 0) + + /** @size: Size of the image blob pointed to by @image. */ + __u32 size; + + /** @mbz: Must be zero. */ + __u32 mbz; + + /** @image: Userspace memory containing the context image. */ + __u64 image; +} __attribute__((packed)); + /** * struct drm_i915_gem_context_create_ext_setparam - Context parameter * to set or query during context creation. diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index e682ab628dfa66bcbc10f9b4adc08f2a61db377b..d358add1611cd13d034b35d9a19cb3ebcc3e388c 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -81,6 +81,8 @@ enum { #define IPPROTO_ETHERNET IPPROTO_ETHERNET IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW + IPPROTO_SMC = 256, /* Shared Memory Communications */ +#define IPPROTO_SMC IPPROTO_SMC IPPROTO_MPTCP = 262, /* Multipath TCP connection */ #define IPPROTO_MPTCP IPPROTO_MPTCP IPPROTO_MAX diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index e5af8c692dc063de39a083db4c8694376de4f3f1..637efc05514534ca635671f64f84bfd65a1b8ea1 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -192,11 +192,24 @@ struct kvm_xen_exit { /* Flags that describe what fields in emulation_failure hold valid data. */ #define KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES (1ULL << 0) +/* + * struct kvm_run can be modified by userspace at any time, so KVM must be + * careful to avoid TOCTOU bugs. In order to protect KVM, HINT_UNSAFE_IN_KVM() + * renames fields in struct kvm_run from to __unsafe when + * compiled into the kernel, ensuring that any use within KVM is obvious and + * gets extra scrutiny. + */ +#ifdef __KERNEL__ +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol##__unsafe +#else +#define HINT_UNSAFE_IN_KVM(_symbol) _symbol +#endif + /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */ struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 immediate_exit; + __u8 HINT_UNSAFE_IN_KVM(immediate_exit); __u8 padding1[6]; /* out */ @@ -918,6 +931,8 @@ struct kvm_enable_cap { #define KVM_CAP_GUEST_MEMFD 234 #define KVM_CAP_VM_TYPES 235 #define KVM_CAP_PRE_FAULT_MEMORY 236 +#define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 +#define KVM_CAP_X86_GUEST_MODE 238 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 3a64499b0f5d63734d632ab03cd1966211473d8c..4842c36fdf80199696b9e4a77d363fbcd9f3996e 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1349,12 +1349,14 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ #define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ #define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -/* 5-0x7 available */ +#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ +/* 0x7 available */ #define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ #define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB */ +#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ #define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ #define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 67626d53531664d0a65fbcf62585204e41588d88..887a2528644168a3f0f16752e6b21958a89e0bac 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt index 3766886c4bca3d967ed2e714982be5016c4816f4..83dc87c662b63ecc17553a15cc15a6b8d6f01d83 100644 --- a/tools/perf/Documentation/Build.txt +++ b/tools/perf/Documentation/Build.txt @@ -71,3 +71,31 @@ supported by GCC. UBSan detects undefined behaviors of programs at runtime. $ UBSAN_OPTIONS=print_stacktrace=1 ./perf record -a If UBSan detects any problem at runtime, it outputs a “runtime error:” message. + +4) Cross compilation +==================== +As Multiarch is commonly supported in Linux distributions, we can install +libraries for multiple architectures on the same system and then cross-compile +Linux perf. For example, Aarch64 libraries and toolchains can be installed on +an x86_64 machine, allowing us to compile perf for an Aarch64 target. + +Below is the command for building the perf with dynamic linking. + + $ cd /path/to/Linux + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +For static linking, the option `LDFLAGS="-static"` is required. + + $ make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- \ + LDFLAGS="-static" -C tools/perf + +In the embedded system world, a use case is to explicitly specify the package +configuration paths for cross building: + + $ PKG_CONFIG_SYSROOT_DIR="/path/to/cross/build/sysroot" \ + PKG_CONFIG_LIBDIR="/usr/lib/:/usr/local/lib" \ + make ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- -C tools/perf + +In this case, the variable PKG_CONFIG_SYSROOT_DIR can be used alongside the +variable PKG_CONFIG_LIBDIR or PKG_CONFIG_PATH to prepend the sysroot path to +the library paths for cross compilation. diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index c896babf7a740e2bb4fb2c50a4a9f9f6998ee315..fa679db61f6226ef838de3b1653341562960a9e3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -152,7 +152,17 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -lebl -ldl -lz -llzma -lbz2 + DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) + LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) + + # Elfutils merged libebl.a into libdw.a starting from version 0.177, + # Link libebl.a only if libdw is older than this version. + ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) + DWARFLIBS += -lebl + endif endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) @@ -296,6 +306,11 @@ endif ifdef PYTHON_CONFIG PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null) + # Update the python flags for cross compilation + ifdef CROSS_COMPILE + PYTHON_NATIVE := $(shell echo $(PYTHON_EMBED_LDOPTS) | sed 's/\(-L.*\/\)\(.*-linux-gnu\).*/\2/') + PYTHON_EMBED_LDOPTS := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EMBED_LDOPTS)) + endif PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null) @@ -897,6 +912,9 @@ else PYTHON_SETUPTOOLS_INSTALLED := $(shell $(PYTHON) -c 'import setuptools;' 2> /dev/null && echo "yes" || echo "no") ifeq ($(PYTHON_SETUPTOOLS_INSTALLED), yes) PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])') + ifdef CROSS_COMPILE + PYTHON_EXTENSION_SUFFIX := $(subst $(PYTHON_NATIVE),$(shell $(CC) -dumpmachine),$(PYTHON_EXTENSION_SUFFIX)) + endif LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX) else $(warning Missing python setuptools, the python binding won't be built, please install python3-setuptools or equivalent) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 175e4c7898f0ed58b11540c3a68f9559d7c4b5bb..f8148db5fc389b2dc2940029dda14eed4a9c156d 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -193,7 +193,32 @@ HOSTLD ?= ld HOSTAR ?= ar CLANG ?= clang -PKG_CONFIG = $(CROSS_COMPILE)pkg-config +# Some distros provide the command $(CROSS_COMPILE)pkg-config for +# searching packges installed with Multiarch. Use it for cross +# compilation if it is existed. +ifneq (, $(shell which $(CROSS_COMPILE)pkg-config)) + PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config +else + PKG_CONFIG ?= pkg-config + + # PKG_CONFIG_PATH or PKG_CONFIG_LIBDIR, alongside PKG_CONFIG_SYSROOT_DIR + # for modified system root, is required for the cross compilation. + # If these PKG_CONFIG environment variables are not set, Multiarch library + # paths are used instead. + ifdef CROSS_COMPILE + ifeq ($(PKG_CONFIG_LIBDIR)$(PKG_CONFIG_PATH)$(PKG_CONFIG_SYSROOT_DIR),) + CROSS_ARCH = $(shell $(CC) -dumpmachine) + PKG_CONFIG_LIBDIR := /usr/local/$(CROSS_ARCH)/lib/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/lib/$(CROSS_ARCH)/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/local/share/pkgconfig/ + PKG_CONFIG_LIBDIR := $(PKG_CONFIG_LIBDIR):/usr/share/pkgconfig/ + export PKG_CONFIG_LIBDIR + $(warning Missing PKG_CONFIG_LIBDIR, PKG_CONFIG_PATH and PKG_CONFIG_SYSROOT_DIR for cross compilation,) + $(warning set PKG_CONFIG_LIBDIR for using Multiarch libs.) + endif + endif +endif RM = rm -f LN = ln -f diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index 3656f1ca7a21c6590267499335d6a911e678e27e..ebae8415dfbbab6f6f0802913e7d0ef5de37ac70 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -230,8 +230,10 @@ 178 nospu rt_sigsuspend sys_rt_sigsuspend compat_sys_rt_sigsuspend 179 32 pread64 sys_ppc_pread64 compat_sys_ppc_pread64 179 64 pread64 sys_pread64 +179 spu pread64 sys_pread64 180 32 pwrite64 sys_ppc_pwrite64 compat_sys_ppc_pwrite64 180 64 pwrite64 sys_pwrite64 +180 spu pwrite64 sys_pwrite64 181 common chown sys_chown 182 common getcwd sys_getcwd 183 common capget sys_capget @@ -246,6 +248,7 @@ 190 common ugetrlimit sys_getrlimit compat_sys_getrlimit 191 32 readahead sys_ppc_readahead compat_sys_ppc_readahead 191 64 readahead sys_readahead +191 spu readahead sys_readahead 192 32 mmap2 sys_mmap2 compat_sys_mmap2 193 32 truncate64 sys_ppc_truncate64 compat_sys_ppc_truncate64 194 32 ftruncate64 sys_ppc_ftruncate64 compat_sys_ppc_ftruncate64 @@ -293,6 +296,7 @@ 232 nospu set_tid_address sys_set_tid_address 233 32 fadvise64 sys_ppc32_fadvise64 compat_sys_ppc32_fadvise64 233 64 fadvise64 sys_fadvise64 +233 spu fadvise64 sys_fadvise64 234 nospu exit_group sys_exit_group 235 nospu lookup_dcookie sys_ni_syscall 236 common epoll_create sys_epoll_create @@ -502,7 +506,7 @@ 412 32 utimensat_time64 sys_utimensat sys_utimensat 413 32 pselect6_time64 sys_pselect6 compat_sys_pselect6_time64 414 32 ppoll_time64 sys_ppoll compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 sys_io_pgetevents sys_io_pgetevents +416 32 io_pgetevents_time64 sys_io_pgetevents compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 sys_recvmmsg compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 sys_mq_timedsend sys_mq_timedsend 419 32 mq_timedreceive_time64 sys_mq_timedreceive sys_mq_timedreceive diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index bd0fee24ad10a3d13ef6734512e52b9048d98692..01071182763e96ff03b5d74e4b9c4387ab84c456 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -418,7 +418,7 @@ 412 32 utimensat_time64 - sys_utimensat 413 32 pselect6_time64 - compat_sys_pselect6_time64 414 32 ppoll_time64 - compat_sys_ppoll_time64 -416 32 io_pgetevents_time64 - sys_io_pgetevents +416 32 io_pgetevents_time64 - compat_sys_io_pgetevents_time64 417 32 recvmmsg_time64 - compat_sys_recvmmsg_time64 418 32 mq_timedsend_time64 - sys_mq_timedsend 419 32 mq_timedreceive_time64 - sys_mq_timedreceive diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index a396f6e6ab5bf97df25666a1048d0f285b8d465e..7093ee21c0d1c0410dbf17a77cb70a6bc6736f04 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -1,8 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note # # 64-bit system call numbers and entry vectors # # The format is: -# +# [ [noreturn]] # # The __x64_sys_*() stubs are created on-the-fly for sys_*() system calls # @@ -68,7 +69,7 @@ 57 common fork sys_fork 58 common vfork sys_vfork 59 64 execve sys_execve -60 common exit sys_exit +60 common exit sys_exit - noreturn 61 common wait4 sys_wait4 62 common kill sys_kill 63 common uname sys_newuname @@ -239,7 +240,7 @@ 228 common clock_gettime sys_clock_gettime 229 common clock_getres sys_clock_getres 230 common clock_nanosleep sys_clock_nanosleep -231 common exit_group sys_exit_group +231 common exit_group sys_exit_group - noreturn 232 common epoll_wait sys_epoll_wait 233 common epoll_ctl sys_epoll_ctl 234 common tgkill sys_tgkill @@ -343,6 +344,7 @@ 332 common statx sys_statx 333 common io_pgetevents sys_io_pgetevents 334 common rseq sys_rseq +335 common uretprobe sys_uretprobe # don't use numbers 387 through 423, add new calls after the last # 'common' entry 424 common pidfd_send_signal sys_pidfd_send_signal diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index de76bbc50bfbcbb6323bef3f67a8953c1978d03e..5c9335fff2d396b435ba877b82c2494e049858ea 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include #include @@ -688,7 +689,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* lock */ csv_sep, daemon->base, "lock"); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - daemon->start) / 60); @@ -700,7 +701,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) daemon->base, SESSION_OUTPUT); fprintf(out, " lock: %s/lock\n", daemon->base); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - daemon->start) / 60); } } @@ -727,7 +728,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) /* session ack */ csv_sep, session->base, SESSION_ACK); - fprintf(out, "%c%lu", + fprintf(out, "%c%" PRIu64, /* session up time */ csv_sep, (curr - session->start) / 60); @@ -745,7 +746,7 @@ static int cmd_session_list(struct daemon *daemon, union cmd *cmd, FILE *out) session->base, SESSION_CONTROL); fprintf(out, " ack: %s/%s\n", session->base, SESSION_ACK); - fprintf(out, " up: %lu minutes\n", + fprintf(out, " up: %" PRIu64 " minutes\n", (curr - session->start) / 60); } } diff --git a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/andes/ax45/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json index a9939823b14b5a89a2bb8dc337c6e69dc90821d6..0c9b9a2d2958a427e64f0d3b6de4ec7a366d05eb 100644 --- a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -74,7 +74,7 @@ { "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", "ConfigCode": "0x800000000000000c", - "EventName": "FW_SFENCE_VMA_RECEIVED", + "EventName": "FW_SFENCE_VMA_ASID_SENT", "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" }, { diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/starfive/dubhe-80/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json index 9b4a032186a7b1c41e342e1bf054bf8e4751c110..7149caec4f80e53cf2035c74411018a60536de54 100644 --- a/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json +++ b/tools/perf/pmu-events/arch/riscv/thead/c900-legacy/firmware.json @@ -36,7 +36,7 @@ "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" }, { - "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + "ArchStdEvent": "FW_SFENCE_VMA_ASID_SENT" }, { "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index 89d16b90370bd412c30a755bd1324f5f8c72aad1..df9cdb8bbfb88428e1d2d6a2d16c7d1abe9ae9fc 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -76,7 +76,7 @@ struct msghdr { __kernel_size_t msg_controllen; /* ancillary data buffer length */ struct kiocb *msg_iocb; /* ptr to iocb for async requests */ struct ubuf_info *msg_ubuf; - int (*sg_from_iter)(struct sock *sk, struct sk_buff *skb, + int (*sg_from_iter)(struct sk_buff *skb, struct iov_iter *from, size_t length); }; @@ -442,11 +442,14 @@ extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr, extern int __sys_socket(int family, int type, int protocol); extern struct file *__sys_socket_file(int family, int type, int protocol); extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen); +extern int __sys_bind_socket(struct socket *sock, struct sockaddr_storage *address, + int addrlen); extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr, int addrlen, int file_flags); extern int __sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen); extern int __sys_listen(int fd, int backlog); +extern int __sys_listen_socket(struct socket *sock, int backlog); extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len); extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr, diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 45e4e64fd6643ce3a83711cb295c711dd67ca511..7539717707337a8cb22396a869baba3bafa08371 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -329,12 +329,17 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO negation of O_APPEND */ #define RWF_NOAPPEND ((__force __kernel_rwf_t)0x00000020) +/* Atomic Write */ +#define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + +#define PROCFS_IOCTL_MAGIC 'f' /* Pagemap ioctl */ -#define PAGEMAP_SCAN _IOWR('f', 16, struct pm_scan_arg) +#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) /* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */ #define PAGE_IS_WPALLOWED (1 << 0) @@ -393,4 +398,158 @@ struct pm_scan_arg { __u64 return_mask; }; +/* /proc//maps ioctl */ +#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query) + +enum procmap_query_flags { + /* + * VMA permission flags. + * + * Can be used as part of procmap_query.query_flags field to look up + * only VMAs satisfying specified subset of permissions. E.g., specifying + * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs, + * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only + * return read/write VMAs, though both executable/non-executable and + * private/shared will be ignored. + * + * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags + * field to specify actual VMA permissions. + */ + PROCMAP_QUERY_VMA_READABLE = 0x01, + PROCMAP_QUERY_VMA_WRITABLE = 0x02, + PROCMAP_QUERY_VMA_EXECUTABLE = 0x04, + PROCMAP_QUERY_VMA_SHARED = 0x08, + /* + * Query modifier flags. + * + * By default VMA that covers provided address is returned, or -ENOENT + * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest + * VMA with vma_start > addr will be returned if no covering VMA is + * found. + * + * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that + * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA + * to iterate all VMAs with file backing. + */ + PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10, + PROCMAP_QUERY_FILE_BACKED_VMA = 0x20, +}; + +/* + * Input/output argument structured passed into ioctl() call. It can be used + * to query a set of VMAs (Virtual Memory Areas) of a process. + * + * Each field can be one of three kinds, marked in a short comment to the + * right of the field: + * - "in", input argument, user has to provide this value, kernel doesn't modify it; + * - "out", output argument, kernel sets this field with VMA data; + * - "in/out", input and output argument; user provides initial value (used + * to specify maximum allowable buffer size), and kernel sets it to actual + * amount of data written (or zero, if there is no data). + * + * If matching VMA is found (according to criterias specified by + * query_addr/query_flags, all the out fields are filled out, and ioctl() + * returns 0. If there is no matching VMA, -ENOENT will be returned. + * In case of any other error, negative error code other than -ENOENT is + * returned. + * + * Most of the data is similar to the one returned as text in /proc//maps + * file, but procmap_query provides more querying flexibility. There are no + * consistency guarantees between subsequent ioctl() calls, but data returned + * for matched VMA is self-consistent. + */ +struct procmap_query { + /* Query struct size, for backwards/forward compatibility */ + __u64 size; + /* + * Query flags, a combination of enum procmap_query_flags values. + * Defines query filtering and behavior, see enum procmap_query_flags. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_flags; /* in */ + /* + * Query address. By default, VMA that covers this address will + * be looked up. PROCMAP_QUERY_* flags above modify this default + * behavior further. + * + * Input argument, provided by user. Kernel doesn't modify it. + */ + __u64 query_addr; /* in */ + /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */ + __u64 vma_start; /* out */ + __u64 vma_end; /* out */ + /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */ + __u64 vma_flags; /* out */ + /* VMA backing page size granularity. */ + __u64 vma_page_size; /* out */ + /* + * VMA file offset. If VMA has file backing, this specifies offset + * within the file that VMA's start address corresponds to. + * Is set to zero if VMA has no backing file. + */ + __u64 vma_offset; /* out */ + /* Backing file's inode number, or zero, if VMA has no backing file. */ + __u64 inode; /* out */ + /* Backing file's device major/minor number, or zero, if VMA has no backing file. */ + __u32 dev_major; /* out */ + __u32 dev_minor; /* out */ + /* + * If set to non-zero value, signals the request to return VMA name + * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix + * appended, if file was unlinked from FS) for matched VMA. VMA name + * can also be some special name (e.g., "[heap]", "[stack]") or could + * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME). + * + * Kernel will set this field to zero, if VMA has no associated name. + * Otherwise kernel will return actual amount of bytes filled in + * user-supplied buffer (see vma_name_addr field below), including the + * terminating zero. + * + * If VMA name is longer that user-supplied maximum buffer size, + * -E2BIG error is returned. + * + * If this field is set to non-zero value, vma_name_addr should point + * to valid user space memory buffer of at least vma_name_size bytes. + * If set to zero, vma_name_addr should be set to zero as well + */ + __u32 vma_name_size; /* in/out */ + /* + * If set to non-zero value, signals the request to extract and return + * VMA's backing file's build ID, if the backing file is an ELF file + * and it contains embedded build ID. + * + * Kernel will set this field to zero, if VMA has no backing file, + * backing file is not an ELF file, or ELF file has no build ID + * embedded. + * + * Build ID is a binary value (not a string). Kernel will set + * build_id_size field to exact number of bytes used for build ID. + * If build ID is requested and present, but needs more bytes than + * user-supplied maximum buffer size (see build_id_addr field below), + * -E2BIG error will be returned. + * + * If this field is set to non-zero value, build_id_addr should point + * to valid user space memory buffer of at least build_id_size bytes. + * If set to zero, build_id_addr should be set to zero as well + */ + __u32 build_id_size; /* in/out */ + /* + * User-supplied address of a buffer of at least vma_name_size bytes + * for kernel to fill with matched VMA's name (see vma_name_size field + * description above for details). + * + * Should be set to zero if VMA name should not be returned. + */ + __u64 vma_name_addr; /* in */ + /* + * User-supplied address of a buffer of at least build_id_size bytes + * for kernel to fill with matched VMA's ELF build ID, if available + * (see build_id_size field description above for details). + * + * Should be set to zero if build ID should not be returned. + */ + __u64 build_id_addr; /* in */ +}; + #endif /* _UAPI_LINUX_FS_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index ad5478dbad007341f70a8816aa506216ffea89ec..225bc366ffcbf0319929e2f55f1fbea88e4d7b81 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -154,7 +154,7 @@ struct mount_attr { */ struct statmount { __u32 size; /* Total size, including strings */ - __u32 __spare1; + __u32 mnt_opts; /* [str] Mount options of the mount */ __u64 mask; /* What results were written */ __u32 sb_dev_major; /* Device ID */ __u32 sb_dev_minor; @@ -172,7 +172,8 @@ struct statmount { __u64 propagate_from; /* Propagation from in current namespace */ __u32 mnt_root; /* [str] Root of mount relative to root of fs */ __u32 mnt_point; /* [str] Mountpoint relative to current root */ - __u64 __spare2[50]; + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u64 __spare2[49]; char str[]; /* Variable size part containing strings */ }; @@ -188,10 +189,12 @@ struct mnt_id_req { __u32 spare; __u64 mnt_id; __u64 param; + __u64 mnt_ns_id; }; /* List of all mnt_id_req versions. */ #define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ /* * @mask bits for statmount(2) @@ -202,10 +205,13 @@ struct mnt_id_req { #define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ #define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ #define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ /* * Special @mnt_id values that can be passed to listmount */ #define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ #endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 67626d53531664d0a65fbcf62585204e41588d88..887a2528644168a3f0f16752e6b21958a89e0bac 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -126,9 +126,15 @@ struct statx { __u64 stx_mnt_id; __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ - __u64 stx_subvol; /* Subvolume identifier */ /* 0xa0 */ - __u64 __spare3[11]; /* Spare space for future expansion */ + __u64 stx_subvol; /* Subvolume identifier */ + __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* 0xb0 */ + __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ + __u32 __spare1[1]; + /* 0xb8 */ + __u64 __spare3[9]; /* Spare space for future expansion */ /* 0x100 */ }; @@ -157,6 +163,7 @@ struct statx { #define STATX_DIOALIGN 0x00002000U /* Want/got direct I/O alignment info */ #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ +#define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ @@ -192,6 +199,7 @@ struct statx { #define STATX_ATTR_MOUNT_ROOT 0x00002000 /* Root of a mount */ #define STATX_ATTR_VERITY 0x00100000 /* [I] Verity protected file */ #define STATX_ATTR_DAX 0x00200000 /* File is currently in DAX state */ +#define STATX_ATTR_WRITE_ATOMIC 0x00400000 /* File supports atomic write operations */ #endif /* _UAPI_LINUX_STAT_H */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 628d46a0da92eb0393dd592a38e987d08dcf6db0..8bf7e8a0eb6f03a8026e07f1968568fb099e7c9f 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -142,7 +142,7 @@ struct snd_hwdep_dsp_image { * * *****************************************************************************/ -#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 17) +#define SNDRV_PCM_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 18) typedef unsigned long snd_pcm_uframes_t; typedef signed long snd_pcm_sframes_t; @@ -334,7 +334,7 @@ union snd_pcm_sync_id { unsigned char id[16]; unsigned short id16[8]; unsigned int id32[4]; -}; +} __attribute__((deprecated)); struct snd_pcm_info { unsigned int device; /* RO/WR (control): device number */ @@ -348,7 +348,7 @@ struct snd_pcm_info { int dev_subclass; /* SNDRV_PCM_SUBCLASS_* */ unsigned int subdevices_count; unsigned int subdevices_avail; - union snd_pcm_sync_id sync; /* hardware synchronization ID */ + unsigned char pad1[16]; /* was: hardware synchronization ID */ unsigned char reserved[64]; /* reserved for future... */ }; @@ -420,7 +420,8 @@ struct snd_pcm_hw_params { unsigned int rate_num; /* R: rate numerator */ unsigned int rate_den; /* R: rate denominator */ snd_pcm_uframes_t fifo_size; /* R: chip FIFO size in frames */ - unsigned char reserved[64]; /* reserved for future */ + unsigned char sync[16]; /* R: synchronization ID (perfect sync - one clock source) */ + unsigned char reserved[48]; /* reserved for future */ }; enum { diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 1730b852a947400ff54cd75d8ec60fa3565fab61..6d075648d2ccf43c464461547ba5562edb385a69 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -1141,7 +1141,7 @@ int hist_entry__append_callchain(struct hist_entry *he, struct perf_sample *samp int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *node, bool hide_unresolved) { - struct machine *machine = maps__machine(node->ms.maps); + struct machine *machine = node->ms.maps ? maps__machine(node->ms.maps) : NULL; maps__put(al->maps); al->maps = maps__get(node->ms.maps); diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 030b388800f05191e5b6492b97ebf96e9d45dcf3..3d1ca9e38b1fa9d2633b5c38ea41021a2865cc7c 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,6 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat +ldflags-y += --wrap=cxl_setup_parent_dport DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index 6f737941dc0e164b9611e9dac91cb9e55b69e715..d619672faa497a02c3ea71efd2f3f66144050805 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -299,6 +299,18 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); +void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +{ + int index; + struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); + + if (!ops || !ops->is_mock_port(dport->dport_dev)) + cxl_setup_parent_dport(host, dport); + + put_cxl_mock_ops(index); +} +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); + MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); MODULE_IMPORT_NS(CXL); diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index dd49c1d23a604cc86953139576a57bf681ef7912..81d4757ecd4c6513c6da25760a547c91eb866ba6 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -713,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp # Make sure we are able to include and link libbpf against c++. $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) $(call msg,CXX,,$@) - $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ + $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@ # Benchmark runner $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ) diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index bd8c75b620c29d20c358ecf5c57cd9f54d9aa9a4..c397336fe1ed1c0a6b81576e0bfde87555aab4d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -216,7 +216,7 @@ static void test_uretprobe_regs_change(void) } #ifndef __NR_uretprobe -#define __NR_uretprobe 467 +#define __NR_uretprobe 335 #endif __naked unsigned long uretprobe_syscall_call_1(void) @@ -253,7 +253,7 @@ static void test_uretprobe_syscall_call(void) struct uprobe_syscall_executed *skel; int pid, status, err, go[2], c; - if (ASSERT_OK(pipe(go), "pipe")) + if (!ASSERT_OK(pipe(go), "pipe")) return; skel = uprobe_syscall_executed__open_and_load(); diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 16bdc3e255913cb9c1f5c0c9c2ef7d38ccab6d39..ef70b88bccb252bf9a64525be27e257ba6227ee7 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1432,4 +1432,58 @@ int iter_arr_with_actual_elem_count(const void *ctx) return sum; } +__u32 upper, select_n, result; +__u64 global; + +static __noinline bool nest_2(char *str) +{ + /* some insns (including branch insns) to ensure stacksafe() is triggered + * in nest_2(). This way, stacksafe() can compare frame associated with nest_1(). + */ + if (str[0] == 't') + return true; + if (str[1] == 'e') + return true; + if (str[2] == 's') + return true; + if (str[3] == 't') + return true; + return false; +} + +static __noinline bool nest_1(int n) +{ + /* case 0: allocate stack, case 1: no allocate stack */ + switch (n) { + case 0: { + char comm[16]; + + if (bpf_get_current_comm(comm, 16)) + return false; + return nest_2(comm); + } + case 1: + return nest_2((char *)&global); + default: + return false; + } +} + +SEC("raw_tp") +__success +int iter_subprog_check_stacksafe(const void *ctx) +{ + long i; + + bpf_for(i, 0, upper) { + if (!nest_1(select_n)) { + result = 1; + return 0; + } + } + + result = 2; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c index 991c473e385938030340db7ed7a998c8bff62b5d..12b4eb9d04347f71f19a2c5a696632bf330cca12 100644 --- a/tools/testing/selftests/core/close_range_test.c +++ b/tools/testing/selftests/core/close_range_test.c @@ -589,4 +589,39 @@ TEST(close_range_cloexec_unshare_syzbot) EXPECT_EQ(close(fd3), 0); } +TEST(close_range_bitmap_corruption) +{ + pid_t pid; + int status; + struct __clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + /* get the first 128 descriptors open */ + for (int i = 2; i < 128; i++) + EXPECT_GE(dup2(0, i), 0); + + /* get descriptor table shared */ + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + /* unshare and truncate descriptor table down to 64 */ + if (sys_close_range(64, ~0U, CLOSE_RANGE_UNSHARE)) + exit(EXIT_FAILURE); + + ASSERT_EQ(fcntl(64, F_GETFD), -1); + /* ... and verify that the range 64..127 is not + stuck "fully used" according to secondary bitmap */ + EXPECT_EQ(dup(0), 64) + exit(EXIT_FAILURE); + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c index 5f541522364fbd534ea0ec45cef119bdfa6d3dae..5d0a809dc2df9313f5b57aab753ced84a2c96d28 100644 --- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c +++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c @@ -29,9 +29,11 @@ static int check_vgem(int fd) version.name = name; ret = ioctl(fd, DRM_IOCTL_VERSION, &version); - if (ret) + if (ret || version.name_len != 4) return 0; + name[4] = '\0'; + return !strcmp(name, "vgem"); } diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 931dbc36ca43d6d272798f9fdf8a0111f6a23f8c..011508ca604b086e338012eb23c568c08c4d71e8 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -19,6 +19,15 @@ def _rss_key_rand(length): return [random.randint(0, 255) for _ in range(length)] +def _rss_key_check(cfg, data=None, context=0): + if data is None: + data = get_rss(cfg, context=context) + if 'rss-hash-key' not in data: + return + non_zero = [x for x in data['rss-hash-key'] if x != 0] + ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}") + + def get_rss(cfg, context=0): return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0] @@ -90,8 +99,9 @@ def _send_traffic_check(cfg, port, name, params): def test_rss_key_indir(cfg): """Test basics like updating the main RSS key and indirection table.""" - if len(_get_rx_cnts(cfg)) < 2: - KsftSkipEx("Device has only one queue (or doesn't support queue stats)") + qcnt = len(_get_rx_cnts(cfg)) + if qcnt < 3: + KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)") data = get_rss(cfg) want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table'] @@ -101,6 +111,7 @@ def test_rss_key_indir(cfg): if not data[k]: raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}") + _rss_key_check(cfg, data=data) key_len = len(data['rss-hash-key']) # Set the key @@ -110,9 +121,26 @@ def test_rss_key_indir(cfg): data = get_rss(cfg) ksft_eq(key, data['rss-hash-key']) + # Set the indirection table and the key together + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key)) + reset_indir = defer(ethtool, f"-X {cfg.ifname} default") + + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(2, max(data['rss-indirection-table'])) + + # Reset indirection table and set the key + key = _rss_key_rand(key_len) + ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key)) + data = get_rss(cfg) + _rss_key_check(cfg, data=data) + ksft_eq(0, min(data['rss-indirection-table'])) + ksft_eq(qcnt - 1, max(data['rss-indirection-table'])) + # Set the indirection table ethtool(f"-X {cfg.ifname} equal 2") - reset_indir = defer(ethtool, f"-X {cfg.ifname} default") data = get_rss(cfg) ksft_eq(0, min(data['rss-indirection-table'])) ksft_eq(1, max(data['rss-indirection-table'])) @@ -317,8 +345,11 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): ctx_cnt = i break + _rss_key_check(cfg, context=ctx_id) + if not create_with_cfg: ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + _rss_key_check(cfg, context=ctx_id) # Sanity check the context we just created data = get_rss(cfg, ctx_id) diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh index 877cd6df94a10c2bdb69ea6f20b381e01a47b68f..fe905a7f34b3c6629fc4909b093aeaa18b0d1b00 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh @@ -2,6 +2,7 @@ # SPDX-License-Identifier: GPL-2.0 lib_dir=$(dirname $0)/../../../net/forwarding +ethtool_lib_dir=$(dirname $0)/../hw ALL_TESTS=" autoneg @@ -11,7 +12,7 @@ ALL_TESTS=" NUM_NETIFS=2 : ${TIMEOUT:=30000} # ms source $lib_dir/lib.sh -source $lib_dir/ethtool_lib.sh +source $ethtool_lib_dir/ethtool_lib.sh setup_prepare() { diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c index dc0408a831d07c7296ba9c0ff1ede75ff07c3481..75b7b4ef6cfaa7e324a043fb97f7a720804aff55 100644 --- a/tools/testing/selftests/hid/hid_bpf.c +++ b/tools/testing/selftests/hid/hid_bpf.c @@ -532,6 +532,7 @@ static void load_programs(const struct test_program programs[], FIXTURE_DATA(hid_bpf) * self, const FIXTURE_VARIANT(hid_bpf) * variant) { + struct bpf_map *iter_map; int err = -EINVAL; ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links)) @@ -564,6 +565,13 @@ static void load_programs(const struct test_program programs[], *ops_hid_id = self->hid_id; } + /* we disable the auto-attach feature of all maps because we + * only want the tested one to be manually attached in the next + * call to bpf_map__attach_struct_ops() + */ + bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj) + bpf_map__set_autoattach(iter_map, false); + err = hid__load(self->skel); ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err); @@ -686,6 +694,24 @@ TEST_F(hid_bpf, subprog_raw_event) ASSERT_EQ(buf[2], 52); } +/* + * Attach hid_first_event to the given uhid device, + * attempt at re-attaching it, we should not lock and + * return an invalid struct bpf_link + */ +TEST_F(hid_bpf, multiple_attach) +{ + const struct test_program progs[] = { + { .name = "hid_first_event" }, + }; + struct bpf_link *link; + + LOAD_PROGRAMS(progs); + + link = bpf_map__attach_struct_ops(self->skel->maps.first_event); + ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops"); +} + /* * Ensures that we can attach/detach programs */ diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c index ee9bbbcf751b4871796518c6545faac6de603575..5ecc845ef79216d42e141e42167289109ea79267 100644 --- a/tools/testing/selftests/hid/progs/hid.c +++ b/tools/testing/selftests/hid/progs/hid.c @@ -455,7 +455,7 @@ struct { __type(value, struct elem); } hmap SEC(".maps"); -static int wq_cb_sleepable(void *map, int *key, struct bpf_wq *work) +static int wq_cb_sleepable(void *map, int *key, void *work) { __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10}; struct hid_bpf_ctx *hid_ctx; diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h index cfe37f4919062271bc3a85c16d9b68fdc0d4f474..e5db897586bbfe010d8799f6f52fc5c418344e6b 100644 --- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h +++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h @@ -114,7 +114,7 @@ extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx, extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym; extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym; extern int bpf_wq_set_callback_impl(struct bpf_wq *wq, - int (callback_fn)(void *map, int *key, struct bpf_wq *wq), + int (callback_fn)(void *map, int *key, void *wq), unsigned int flags__k, void *aux__ign) __ksym; #define bpf_wq_set_callback(timer, cb, flags) \ bpf_wq_set_callback_impl(timer, cb, flags, NULL) diff --git a/tools/testing/selftests/kselftest/ksft.py b/tools/testing/selftests/kselftest/ksft.py index cd89fb2bc10e7f3c10a6fbe5eacd66c21bc0779a..bf215790a89d739e0df9bdc111e808d51e385af3 100644 --- a/tools/testing/selftests/kselftest/ksft.py +++ b/tools/testing/selftests/kselftest/ksft.py @@ -70,7 +70,7 @@ def test_result(condition, description=""): def finished(): - if ksft_cnt["pass"] == ksft_num_tests: + if ksft_cnt["pass"] + ksft_cnt["skip"] == ksft_num_tests: exit_code = KSFT_PASS else: exit_code = KSFT_FAIL diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c index 709d7d72176035fedbf2131ef89babd5f16658b0..4abebde781873b80a4fb8e72f4028d98770e553c 100644 --- a/tools/testing/selftests/kvm/aarch64/get-reg-list.c +++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c @@ -32,13 +32,13 @@ static struct feature_id_reg feat_id_regs[] = { { ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 }, { ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 4, + 8, 1 } }; diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c index f92c2fb23fcd75523dda259edf82439332e34adc..8e34f7fa44e94c1dac5741824201e2dde5b93751 100644 --- a/tools/testing/selftests/kvm/riscv/get-reg-list.c +++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c @@ -961,10 +961,10 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB); KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC); KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX); KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS); -KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA), -KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB), -KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD), -KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF), +KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA); +KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB); +KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD); +KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF); KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP); KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA); KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 69849acd95b0afc06ca739fd1ee03ddc5be71e58..618cd244239004ade26294d74ad5ebcd66f17b7c 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -184,6 +184,33 @@ static void test_apic_id(void) kvm_vm_free(vm); } +static void test_x2apic_id(void) +{ + struct kvm_lapic_state lapic = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + vcpu_set_msr(vcpu, MSR_IA32_APICBASE, MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE); + + /* + * Try stuffing a modified x2APIC ID, KVM should ignore the value and + * always return the vCPU's default/readonly x2APIC ID. + */ + for (i = 0; i <= 0xff; i++) { + *(u32 *)(lapic.regs + APIC_ID) = i << 24; + *(u32 *)(lapic.regs + APIC_SPIV) = APIC_SPIV_APIC_ENABLED; + vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic); + + vcpu_ioctl(vcpu, KVM_GET_LAPIC, &lapic); + TEST_ASSERT(*((u32 *)&lapic.regs[APIC_ID]) == vcpu->id << 24, + "x2APIC ID should be fully readonly"); + } + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { struct xapic_vcpu x = { @@ -211,4 +238,5 @@ int main(int argc, char *argv[]) kvm_vm_free(vm); test_apic_id(); + test_x2apic_id(); } diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile index 901e0d07765b6f71f288d731741487b6f99907ff..cfad627e8d94de4a7ac67bf870dc91774ce7294c 100644 --- a/tools/testing/selftests/mm/Makefile +++ b/tools/testing/selftests/mm/Makefile @@ -53,7 +53,9 @@ TEST_GEN_FILES += madv_populate TEST_GEN_FILES += map_fixed_noreplace TEST_GEN_FILES += map_hugetlb TEST_GEN_FILES += map_populate +ifneq (,$(filter $(ARCH),arm64 riscv riscv64 x86 x86_64)) TEST_GEN_FILES += memfd_secret +endif TEST_GEN_FILES += migration TEST_GEN_FILES += mkdirty TEST_GEN_FILES += mlock-random-test @@ -110,7 +112,7 @@ endif endif -ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64)) +ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64 s390)) TEST_GEN_FILES += va_high_addr_switch TEST_GEN_FILES += virtual_address_range TEST_GEN_FILES += write_to_hugetlbfs diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index e140558e6f53f8b11b051f78b2e6fb7820ac3194..2c3a0eb6b22d31d5cdd1251a85ee82c401b95c15 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -89,9 +89,10 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int fd, ret = -1; int compaction_index = 0; char nr_hugepages[20] = {0}; - char init_nr_hugepages[20] = {0}; + char init_nr_hugepages[24] = {0}; - sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages); + snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), + "%lu", initial_nr_hugepages); /* We want to test with 80% of available memory. Else, OOM killer comes in to play */ diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index 1b03bcfaefdfa2be6131220088ef4e9ce3fce6bb..5a3a9bcba6404853678a8840d9b9f7860b6179ee 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -22,8 +22,10 @@ #define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */ #define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */ +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) #define MAX(X, Y) ((X) > (Y) ? (X) : (Y)) +#endif #define SIZE_MB(m) ((size_t)m * (1024 * 1024)) #define SIZE_KB(k) ((size_t)k * 1024) diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh index 03ac4f2e1cce655a9c01c3d69200ba79a3f11269..36045edb10dea0bd6992ba6d07b74db3647fe4da 100755 --- a/tools/testing/selftests/mm/run_vmtests.sh +++ b/tools/testing/selftests/mm/run_vmtests.sh @@ -374,8 +374,11 @@ CATEGORY="hmm" run_test bash ./test_hmm.sh smoke # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests CATEGORY="madv_populate" run_test ./madv_populate +if [ -x ./memfd_secret ] +then (echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix CATEGORY="memfd_secret" run_test ./memfd_secret +fi # KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100 CATEGORY="ksm" run_test ./ksm_tests -H -s 100 diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaebecc81479bca91c88493929fc4bfd..535eb2c3d7d1c8675eb38efd2a6774106c221152 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -209,7 +209,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, + const char *expected_buf, int expected_len, int buf_len, int flags) { int i, ret[2], recv_errno[2], expected_errno = 0; diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f873475c6629bd4e93842ef03f78f..90f8a244ea90159d867a8e6c1231dcf31d2ae244 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,58 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff000dc2720e6c4b3604833fceea27..718d04a4f72d028b87a44390921c9b907d11dbe1 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -500,6 +500,11 @@ check_err_fail() fi } +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + xfail_on_slow() { if [[ $KSFT_MACHINE_SLOW = yes ]]; then @@ -1113,6 +1118,39 @@ mac_get() ip -j link show dev $if_name | jq -r '.[]["address"]' } +ether_addr_to_u64() +{ + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done + + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done +} + ipv6_lladdr_get() { local if_name=$1 @@ -2229,3 +2267,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0cfe25388485297d46a7152ccbfaf..648868f746044200c3d2bd9324b43ff9b3e3714e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes @@ -37,9 +39,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +108,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +131,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +145,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +168,78 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,61 +247,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -228,57 +364,208 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f77300083c0ee27462223d6ccd10e2d7..8ee4489238ca4b19bcf6608a71256ba89472148f 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -146,6 +146,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d69d8d3aba159406ec6391b8e401f..4209b95690394b7565f330a37606bf39b6d2d228 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1115,11 +1115,11 @@ again: return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (--cfg_repeat > 0) goto again; - } return 0; } diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef10a9ecba6239929d9e52c642a356f..89e553e0e0c2ef42d261b4b9a327bbd9441d5c0e 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -436,9 +436,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -661,7 +662,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -1415,18 +1416,28 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local extra_msg="" local count local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + extra_msg="invert" + fi + + timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1438,7 +1449,7 @@ chk_add_nr() fi print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1449,7 +1460,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1459,7 +1470,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1470,7 +1481,7 @@ chk_add_nr() fi print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1481,7 +1492,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1492,7 +1503,7 @@ chk_add_nr() fi print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1503,7 +1514,7 @@ chk_add_nr() fi print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1513,6 +1524,8 @@ chk_add_nr() print_ok fi fi + + print_info "$extra_msg" } chk_add_tx_nr() @@ -1634,6 +1647,8 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count print_check "ptx" @@ -1655,6 +1670,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1955,6 +1990,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -2612,11 +2662,24 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal @@ -2625,20 +2688,20 @@ backup_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +2710,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +2738,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +2748,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -2987,6 +3059,7 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3053,7 +3126,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3139,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3318,7 +3391,7 @@ userspace_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 0 - chk_prio_nr 0 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3500,10 +3573,10 @@ endpoint_tests() mptcp_lib_kill_wait $tests_pid fi - if reset "delete and re-add" && + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & @@ -3520,11 +3593,94 @@ endpoint_tests() chk_subflow_nr "after delete" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_rm_nr 1 1 + fi + + # remove and re-add + if reset "delete re-add signal" && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 3 3 3 + chk_add_nr 4 4 + chk_rm_nr 2 1 invert + fi + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + test_linkfail=4 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_wait $tests_pid + + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert fi } diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f925835a0c27a25f8d2090e8791414e..d13fb5ea3e89425a43c879a7e051835ae5bf7443 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -7,6 +7,7 @@ MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += br_netfilter_queue.sh TEST_PROGS += conntrack_icmp_related.sh TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 0000000000000000000000000000000000000000..6a764d70ab06f984d0a07faa4791fe4741b9001b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,78 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin < /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564fd5d70c424bd20ec4c412fafbbc08..d5ffd8c9172e1d9e36dd180fa2de561637bab5ad 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -7,8 +7,6 @@ source net_helper.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bfec6bc8bea1a704e7bc074ad794a5..3f2fca02fec53f52beaddb43ea4255bca24b79b8 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -255,6 +258,13 @@ struct testcase testcases_v6[] = { .gso_len = 1, .r_num_mss = 2, }, + { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, { /* send 2B + 2B + 1B segments */ .tlen = 5, @@ -396,11 +406,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +429,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index e3f97f90d8db0f6fb8864d9641da44f9138c8b6b..8c3a73461475bae52436e311d4f0b723c059b664 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -60,7 +60,9 @@ #define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) #endif +#ifndef MIN #define MIN(X, Y) ((X) < (Y) ? (X) : (Y)) +#endif #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index ee349187636fc127f73421cb1ea2b92af99265d1..4f255cec0c22e7e263c233d9698885ae89223f0b 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -143,7 +143,6 @@ class PluginMgr: except Exception as ee: print('exception {} in call to pre_case for {} plugin'. format(ee, pgn_inst.__class__)) - print('test_ordinal is {}'.format(test_ordinal)) print('testid is {}'.format(caseinfo['id'])) raise diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index f594a44df840e41a3154bff1f67aa6a816e0d57a..2f756628613dd82d86a22e1e17752c7e3f36c941 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -651,8 +651,10 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) return NULL; tool->data = osnoise_alloc_top(nr_cpus); - if (!tool->data) - goto out_err; + if (!tool->data) { + osnoise_destroy_tool(tool); + return NULL; + } tool->params = params; @@ -660,11 +662,6 @@ struct osnoise_tool *osnoise_init_top(struct osnoise_top_params *params) osnoise_top_handler, NULL); return tool; - -out_err: - osnoise_free_top(tool->data); - osnoise_destroy_tool(tool); - return NULL; } static int stop_tracing; diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index b14e14cdbfb9fd800b45c9d76d3a7a5ea3a44be4..fd6a3010afa833e077623065b80bdbb5b1012250 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -113,10 +113,10 @@ config KVM_GENERIC_PRIVATE_MEM select KVM_PRIVATE_MEM bool -config HAVE_KVM_GMEM_PREPARE +config HAVE_KVM_ARCH_GMEM_PREPARE bool depends on KVM_PRIVATE_MEM -config HAVE_KVM_GMEM_INVALIDATE +config HAVE_KVM_ARCH_GMEM_INVALIDATE bool depends on KVM_PRIVATE_MEM diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 229570059a1bb5fc44be96499b17090d13150d4e..992f9beb3e7d0403d7f3a4d780025a5660fdbaa5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -97,18 +97,19 @@ irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) mutex_lock(&kvm->irqfds.resampler_lock); list_del_rcu(&irqfd->resampler_link); - synchronize_srcu(&kvm->irq_srcu); if (list_empty(&resampler->list)) { list_del_rcu(&resampler->link); kvm_unregister_irq_ack_notifier(kvm, &resampler->notifier); /* - * synchronize_srcu(&kvm->irq_srcu) already called + * synchronize_srcu_expedited(&kvm->irq_srcu) already called * in kvm_unregister_irq_ack_notifier(). */ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, resampler->notifier.gsi, 0, false); kfree(resampler); + } else { + synchronize_srcu_expedited(&kvm->irq_srcu); } mutex_unlock(&kvm->irqfds.resampler_lock); @@ -126,7 +127,7 @@ irqfd_shutdown(struct work_struct *work) u64 cnt; /* Make sure irqfd has been initialized in assign path. */ - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); /* * Synchronize with the wait-queue and unhook ourselves to prevent @@ -384,7 +385,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) } list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); mutex_unlock(&kvm->irqfds.resampler_lock); } @@ -523,7 +524,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, mutex_lock(&kvm->irq_lock); hlist_del_init_rcu(&kian->link); mutex_unlock(&kvm->irq_lock); - synchronize_srcu(&kvm->irq_srcu); + synchronize_srcu_expedited(&kvm->irq_srcu); kvm_arch_post_irq_ack_notifier_list_update(kvm); } @@ -608,7 +609,7 @@ kvm_irqfd_release(struct kvm *kvm) /* * Take note of a change in irq routing. - * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards. + * Caller must invoke synchronize_srcu_expedited(&kvm->irq_srcu) afterwards. */ void kvm_irq_routing_update(struct kvm *kvm) { diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c index 1c509c3512614666f6cf592766ca4e5c3a8b9266..8f079a61a56db761116f0c12cc57322cc58d5a4f 100644 --- a/virt/kvm/guest_memfd.c +++ b/virt/kvm/guest_memfd.c @@ -13,84 +13,93 @@ struct kvm_gmem { struct list_head entry; }; -static int kvm_gmem_prepare_folio(struct inode *inode, pgoff_t index, struct folio *folio) +/** + * folio_file_pfn - like folio_file_page, but return a pfn. + * @folio: The folio which contains this index. + * @index: The index we want to look up. + * + * Return: The pfn for this index. + */ +static inline kvm_pfn_t folio_file_pfn(struct folio *folio, pgoff_t index) { -#ifdef CONFIG_HAVE_KVM_GMEM_PREPARE - struct list_head *gmem_list = &inode->i_mapping->i_private_list; - struct kvm_gmem *gmem; + return folio_pfn(folio) + (index & (folio_nr_pages(folio) - 1)); +} - list_for_each_entry(gmem, gmem_list, entry) { - struct kvm_memory_slot *slot; - struct kvm *kvm = gmem->kvm; - struct page *page; - kvm_pfn_t pfn; - gfn_t gfn; - int rc; - - if (!kvm_arch_gmem_prepare_needed(kvm)) - continue; - - slot = xa_load(&gmem->bindings, index); - if (!slot) - continue; - - page = folio_file_page(folio, index); - pfn = page_to_pfn(page); - gfn = slot->base_gfn + index - slot->gmem.pgoff; - rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, compound_order(compound_head(page))); - if (rc) { - pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", - index, gfn, pfn, rc); - return rc; - } +static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + pgoff_t index, struct folio *folio) +{ +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_PREPARE + kvm_pfn_t pfn = folio_file_pfn(folio, index); + gfn_t gfn = slot->base_gfn + index - slot->gmem.pgoff; + int rc = kvm_arch_gmem_prepare(kvm, gfn, pfn, folio_order(folio)); + if (rc) { + pr_warn_ratelimited("gmem: Failed to prepare folio for index %lx GFN %llx PFN %llx error %d.\n", + index, gfn, pfn, rc); + return rc; } - #endif + return 0; } -static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index, bool prepare) +static inline void kvm_gmem_mark_prepared(struct folio *folio) { - struct folio *folio; + folio_mark_uptodate(folio); +} - /* TODO: Support huge pages. */ - folio = filemap_grab_folio(inode->i_mapping, index); - if (IS_ERR(folio)) - return folio; +/* + * Process @folio, which contains @gfn, so that the guest can use it. + * The folio must be locked and the gfn must be contained in @slot. + * On successful return the guest sees a zero page so as to avoid + * leaking host data and the up-to-date flag is set. + */ +static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, + gfn_t gfn, struct folio *folio) +{ + unsigned long nr_pages, i; + pgoff_t index; + int r; + + nr_pages = folio_nr_pages(folio); + for (i = 0; i < nr_pages; i++) + clear_highpage(folio_page(folio, i)); /* - * Use the up-to-date flag to track whether or not the memory has been - * zeroed before being handed off to the guest. There is no backing - * storage for the memory, so the folio will remain up-to-date until - * it's removed. + * Preparing huge folios should always be safe, since it should + * be possible to split them later if needed. * - * TODO: Skip clearing pages when trusted firmware will do it when - * assigning memory to the guest. + * Right now the folio order is always going to be zero, but the + * code is ready for huge folios. The only assumption is that + * the base pgoff of memslots is naturally aligned with the + * requested page order, ensuring that huge folios can also use + * huge page table entries for GPA->HPA mapping. + * + * The order will be passed when creating the guest_memfd, and + * checked when creating memslots. */ - if (!folio_test_uptodate(folio)) { - unsigned long nr_pages = folio_nr_pages(folio); - unsigned long i; - - for (i = 0; i < nr_pages; i++) - clear_highpage(folio_page(folio, i)); - - folio_mark_uptodate(folio); - } + WARN_ON(!IS_ALIGNED(slot->gmem.pgoff, 1 << folio_order(folio))); + index = gfn - slot->base_gfn + slot->gmem.pgoff; + index = ALIGN_DOWN(index, 1 << folio_order(folio)); + r = __kvm_gmem_prepare_folio(kvm, slot, index, folio); + if (!r) + kvm_gmem_mark_prepared(folio); - if (prepare) { - int r = kvm_gmem_prepare_folio(inode, index, folio); - if (r < 0) { - folio_unlock(folio); - folio_put(folio); - return ERR_PTR(r); - } - } + return r; +} - /* - * Ignore accessed, referenced, and dirty flags. The memory is - * unevictable and there is no storage to write back to. - */ - return folio; +/* + * Returns a locked folio on success. The caller is responsible for + * setting the up-to-date flag before the memory is mapped into the guest. + * There is no backing storage for the memory, so the folio will remain + * up-to-date until it's removed. + * + * Ignore accessed, referenced, and dirty flags. The memory is + * unevictable and there is no storage to write back to. + */ +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t index) +{ + /* TODO: Support huge pages. */ + return filemap_grab_folio(inode->i_mapping, index); } static void kvm_gmem_invalidate_begin(struct kvm_gmem *gmem, pgoff_t start, @@ -190,7 +199,7 @@ static long kvm_gmem_allocate(struct inode *inode, loff_t offset, loff_t len) break; } - folio = kvm_gmem_get_folio(inode, index, true); + folio = kvm_gmem_get_folio(inode, index); if (IS_ERR(folio)) { r = PTR_ERR(folio); break; @@ -343,7 +352,7 @@ static int kvm_gmem_error_folio(struct address_space *mapping, struct folio *fol return MF_DELAYED; } -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE static void kvm_gmem_free_folio(struct folio *folio) { struct page *page = folio_page(folio, 0); @@ -358,7 +367,7 @@ static const struct address_space_operations kvm_gmem_aops = { .dirty_folio = noop_dirty_folio, .migrate_folio = kvm_gmem_migrate_folio, .error_remove_folio = kvm_gmem_error_folio, -#ifdef CONFIG_HAVE_KVM_GMEM_INVALIDATE +#ifdef CONFIG_HAVE_KVM_ARCH_GMEM_INVALIDATE .free_folio = kvm_gmem_free_folio, #endif }; @@ -541,64 +550,76 @@ void kvm_gmem_unbind(struct kvm_memory_slot *slot) fput(file); } -static int __kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, - gfn_t gfn, kvm_pfn_t *pfn, int *max_order, bool prepare) +/* Returns a locked folio on success. */ +static struct folio * +__kvm_gmem_get_pfn(struct file *file, struct kvm_memory_slot *slot, + gfn_t gfn, kvm_pfn_t *pfn, bool *is_prepared, + int *max_order) { pgoff_t index = gfn - slot->base_gfn + slot->gmem.pgoff; struct kvm_gmem *gmem = file->private_data; struct folio *folio; - struct page *page; - int r; if (file != slot->gmem.file) { WARN_ON_ONCE(slot->gmem.file); - return -EFAULT; + return ERR_PTR(-EFAULT); } gmem = file->private_data; if (xa_load(&gmem->bindings, index) != slot) { WARN_ON_ONCE(xa_load(&gmem->bindings, index)); - return -EIO; + return ERR_PTR(-EIO); } - folio = kvm_gmem_get_folio(file_inode(file), index, prepare); + folio = kvm_gmem_get_folio(file_inode(file), index); if (IS_ERR(folio)) - return PTR_ERR(folio); + return folio; if (folio_test_hwpoison(folio)) { folio_unlock(folio); folio_put(folio); - return -EHWPOISON; + return ERR_PTR(-EHWPOISON); } - page = folio_file_page(folio, index); - - *pfn = page_to_pfn(page); + *pfn = folio_file_pfn(folio, index); if (max_order) *max_order = 0; - r = 0; - - folio_unlock(folio); - - return r; + *is_prepared = folio_test_uptodate(folio); + return folio; } int kvm_gmem_get_pfn(struct kvm *kvm, struct kvm_memory_slot *slot, gfn_t gfn, kvm_pfn_t *pfn, int *max_order) { struct file *file = kvm_gmem_get_file(slot); - int r; + struct folio *folio; + bool is_prepared = false; + int r = 0; if (!file) return -EFAULT; - r = __kvm_gmem_get_pfn(file, slot, gfn, pfn, max_order, true); + folio = __kvm_gmem_get_pfn(file, slot, gfn, pfn, &is_prepared, max_order); + if (IS_ERR(folio)) { + r = PTR_ERR(folio); + goto out; + } + + if (!is_prepared) + r = kvm_gmem_prepare_folio(kvm, slot, gfn, folio); + + folio_unlock(folio); + if (r < 0) + folio_put(folio); + +out: fput(file); return r; } EXPORT_SYMBOL_GPL(kvm_gmem_get_pfn); +#ifdef CONFIG_KVM_GENERIC_PRIVATE_MEM long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages, kvm_gmem_populate_cb post_populate, void *opaque) { @@ -625,7 +646,9 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long npages = min_t(ulong, slot->npages - (start_gfn - slot->base_gfn), npages); for (i = 0; i < npages; i += (1 << max_order)) { + struct folio *folio; gfn_t gfn = start_gfn + i; + bool is_prepared = false; kvm_pfn_t pfn; if (signal_pending(current)) { @@ -633,18 +656,39 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long break; } - ret = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &max_order, false); - if (ret) + folio = __kvm_gmem_get_pfn(file, slot, gfn, &pfn, &is_prepared, &max_order); + if (IS_ERR(folio)) { + ret = PTR_ERR(folio); break; + } - if (!IS_ALIGNED(gfn, (1 << max_order)) || - (npages - i) < (1 << max_order)) - max_order = 0; + if (is_prepared) { + folio_unlock(folio); + folio_put(folio); + ret = -EEXIST; + break; + } + + folio_unlock(folio); + WARN_ON(!IS_ALIGNED(gfn, 1 << max_order) || + (npages - i) < (1 << max_order)); + + ret = -EINVAL; + while (!kvm_range_has_memory_attributes(kvm, gfn, gfn + (1 << max_order), + KVM_MEMORY_ATTRIBUTE_PRIVATE, + KVM_MEMORY_ATTRIBUTE_PRIVATE)) { + if (!max_order) + goto put_folio_and_exit; + max_order--; + } p = src ? src + i * PAGE_SIZE : NULL; ret = post_populate(kvm, gfn, pfn, p, max_order, opaque); + if (!ret) + kvm_gmem_mark_prepared(folio); - put_page(pfn_to_page(pfn)); +put_folio_and_exit: + folio_put(folio); if (ret) break; } @@ -655,3 +699,4 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long return ret && !i ? ret : i; } EXPORT_SYMBOL_GPL(kvm_gmem_populate); +#endif diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d0788d0a72cc090c8d4eef33c9b05a9d0a923a43..cb2b78e92910fb1d6ebed0b73cf359e866f0a06b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1578,15 +1578,14 @@ static int check_memory_region_flags(struct kvm *kvm, if (mem->flags & KVM_MEM_GUEST_MEMFD) valid_flags &= ~KVM_MEM_LOG_DIRTY_PAGES; -#ifdef CONFIG_HAVE_KVM_READONLY_MEM /* * GUEST_MEMFD is incompatible with read-only memslots, as writes to * read-only memslots have emulated MMIO, not page fault, semantics, * and KVM doesn't allow emulated MMIO for private memory. */ - if (!(mem->flags & KVM_MEM_GUEST_MEMFD)) + if (kvm_arch_has_readonly_mem(kvm) && + !(mem->flags & KVM_MEM_GUEST_MEMFD)) valid_flags |= KVM_MEM_READONLY; -#endif if (mem->flags & ~valid_flags) return -EINVAL; @@ -2398,48 +2397,47 @@ static int kvm_vm_ioctl_clear_dirty_log(struct kvm *kvm, #endif /* CONFIG_KVM_GENERIC_DIRTYLOG_READ_PROTECT */ #ifdef CONFIG_KVM_GENERIC_MEMORY_ATTRIBUTES +static u64 kvm_supported_mem_attributes(struct kvm *kvm) +{ + if (!kvm || kvm_arch_has_private_mem(kvm)) + return KVM_MEMORY_ATTRIBUTE_PRIVATE; + + return 0; +} + /* * Returns true if _all_ gfns in the range [@start, @end) have attributes - * matching @attrs. + * such that the bits in @mask match @attrs. */ bool kvm_range_has_memory_attributes(struct kvm *kvm, gfn_t start, gfn_t end, - unsigned long attrs) + unsigned long mask, unsigned long attrs) { XA_STATE(xas, &kvm->mem_attr_array, start); unsigned long index; - bool has_attrs; void *entry; - rcu_read_lock(); + mask &= kvm_supported_mem_attributes(kvm); + if (attrs & ~mask) + return false; - if (!attrs) { - has_attrs = !xas_find(&xas, end - 1); - goto out; - } + if (end == start + 1) + return (kvm_get_memory_attributes(kvm, start) & mask) == attrs; + + guard(rcu)(); + if (!attrs) + return !xas_find(&xas, end - 1); - has_attrs = true; for (index = start; index < end; index++) { do { entry = xas_next(&xas); } while (xas_retry(&xas, entry)); - if (xas.xa_index != index || xa_to_value(entry) != attrs) { - has_attrs = false; - break; - } + if (xas.xa_index != index || + (xa_to_value(entry) & mask) != attrs) + return false; } -out: - rcu_read_unlock(); - return has_attrs; -} - -static u64 kvm_supported_mem_attributes(struct kvm *kvm) -{ - if (!kvm || kvm_arch_has_private_mem(kvm)) - return KVM_MEMORY_ATTRIBUTE_PRIVATE; - - return 0; + return true; } static __always_inline void kvm_handle_gfn_range(struct kvm *kvm, @@ -2534,7 +2532,7 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, mutex_lock(&kvm->slots_lock); /* Nothing to do if the entire range as the desired attributes. */ - if (kvm_range_has_memory_attributes(kvm, start, end, attributes)) + if (kvm_range_has_memory_attributes(kvm, start, end, ~0, attributes)) goto out_unlock; /*