diff --git a/.clang-format b/.clang-format index bcf60344f9f5b07b072d50c9bbd52f173f38ef69..b62836419ea3e2d19de323071132fa26135c2f10 100644 --- a/.clang-format +++ b/.clang-format @@ -441,8 +441,11 @@ ForEachMacros: - 'inet_lhash2_for_each_icsk' - 'inet_lhash2_for_each_icsk_continue' - 'inet_lhash2_for_each_icsk_rcu' + - 'interval_tree_for_each_double_span' + - 'interval_tree_for_each_span' - 'intlist__for_each_entry' - 'intlist__for_each_entry_safe' + - 'iopt_for_each_contig_area' - 'kcore_copy__for_each_phdr' - 'key_for_each' - 'key_for_each_safe' diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs index 483639fb727b203e405b125a21611aaca2cfd487..9e3756625a8194d92761ee3e286c84aa273e5fa5 100644 --- a/Documentation/ABI/testing/sysfs-fs-f2fs +++ b/Documentation/ABI/testing/sysfs-fs-f2fs @@ -99,6 +99,12 @@ Description: Controls the issue rate of discard commands that consist of small checkpoint is triggered, and issued during the checkpoint. By default, it is disabled with 0. +What: /sys/fs/f2fs//max_ordered_discard +Date: October 2022 +Contact: "Yangtao Li" +Description: Controls the maximum ordered discard, the unit size is one block(4KB). + Set it to 16 by default. + What: /sys/fs/f2fs//max_discard_request Date: December 2021 Contact: "Konstantin Vyshetsky" @@ -132,7 +138,8 @@ Contact: "Chao Yu" Description: Controls discard granularity of inner discard thread. Inner thread will not issue discards with size that is smaller than granularity. The unit size is one block(4KB), now only support configuring - in range of [1, 512]. Default value is 4(=16KB). + in range of [1, 512]. Default value is 16. + For small devices, default value is 1. What: /sys/fs/f2fs//umount_discard_timeout Date: January 2019 @@ -235,7 +242,7 @@ Description: Shows total written kbytes issued to disk. What: /sys/fs/f2fs//features Date: July 2017 Contact: "Jaegeuk Kim" -Description: /feature_list/ +Description: /feature_list/> Shows all enabled features in current device. Supported features: encryption, blkzoned, extra_attr, projquota, inode_checksum, @@ -592,10 +599,10 @@ Description: With "mode=fragment:block" mount options, we can scatter block allo in the length of 1.. by turns. This value can be set between 1..512 and the default value is 4. -What: /sys/fs/f2fs//gc_urgent_high_remaining -Date: December 2021 -Contact: "Daeho Jeong" -Description: You can set the trial count limit for GC urgent high mode with this value. +What: /sys/fs/f2fs//gc_remaining_trials +Date: October 2022 +Contact: "Yangtao Li" +Description: You can set the trial count limit for GC urgent and idle mode with this value. If GC thread gets to the limit, the mode will turn back to GC normal mode. By default, the value is zero, which means there is no limit like before. @@ -634,3 +641,31 @@ Date: July 2022 Contact: "Daeho Jeong" Description: Show the accumulated total revoked atomic write block count after boot. If you write "0" here, you can initialize to "0". + +What: /sys/fs/f2fs//gc_mode +Date: October 2022 +Contact: "Yangtao Li" +Description: Show the current gc_mode as a string. + This is a read-only entry. + +What: /sys/fs/f2fs//discard_urgent_util +Date: November 2022 +Contact: "Yangtao Li" +Description: When space utilization exceeds this, do background DISCARD aggressively. + Does DISCARD forcibly in a period of given min_discard_issue_time when the number + of discards is not 0 and set discard granularity to 1. + Default: 80 + +What: /sys/fs/f2fs//hot_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as hot. By default it was initialized as 262144 blocks + (equals to 1GB). + +What: /sys/fs/f2fs//warm_data_age_threshold +Date: November 2022 +Contact: "Ping Xiong" +Description: When DATA SEPARATION is on, it controls the age threshold to indicate + the data blocks as warm. By default it was initialized as 2621440 blocks + (equals to 10GB). diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count new file mode 100644 index 0000000000000000000000000000000000000000..156cca9dbc960628c07458c9ec52d686cc551385 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-oops_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Oopsed since last boot. diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count new file mode 100644 index 0000000000000000000000000000000000000000..08f083d2fd51bf59bd8a6b8dd4abe77af3be6a35 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-kernel-warn_count @@ -0,0 +1,6 @@ +What: /sys/kernel/oops_count +Date: November 2022 +KernelVersion: 6.2.0 +Contact: Linux Kernel Hardening List +Description: + Shows how many times the system has Warned since last boot. diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst index 3766bf8a1c20ed9bdc0d99770a90e312fd470062..ed3b8dc854ecf215b770b25b68209761997eb2c7 100644 --- a/Documentation/admin-guide/cifs/usage.rst +++ b/Documentation/admin-guide/cifs/usage.rst @@ -858,7 +858,7 @@ CIFS kernel module parameters These module parameters can be specified or modified either during the time of module loading or during the runtime by using the interface:: - /proc/module/cifs/parameters/ + /sys/module/cifs/parameters/ i.e.:: diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index 6726f439958ca0bad0d0fe26a3fe8034ccbdbaef..86fd884928700bd02b8673375e60ddde5f408521 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -595,3 +595,32 @@ X2TLB ----- Indicates whether the crashed kernel enabled SH extended mode. + +RISCV64 +======= + +VA_BITS +------- + +The maximum number of bits for virtual addresses. Used to compute the +virtual memory ranges. + +PAGE_OFFSET +----------- + +Indicates the virtual kernel start address of the direct-mapped RAM region. + +phys_ram_base +------------- + +Indicates the start physical RAM address. + +MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END|KERNEL_LINK_ADDR +---------------------------------------------------------------------------------------------- + +Used to get the correct ranges: + + * MODULES_VADDR ~ MODULES_END : Kernel module space. + * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space. + * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array. + * KERNEL_LINK_ADDR : start address of Kernel link and BPF diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 39e7a2d46e1e4c2152eabc6002010e3d68e7acf3..659d7bc47d7b2acdb6c47fa8cd234a3836a2446b 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -670,6 +670,15 @@ This is the default behavior. an oops event is detected. +oops_limit +========== + +Number of kernel oopses after which the kernel should panic when +``panic_on_oops`` is not set. Setting this to 0 disables checking +the count. Setting this to 1 has the same effect as setting +``panic_on_oops=1``. The default value is 10000. + + osrelease, ostype & version =========================== @@ -1526,6 +1535,16 @@ entry will default to 2 instead of 0. 2 Unprivileged calls to ``bpf()`` are disabled = ============================================================= + +warn_limit +========== + +Number of kernel warnings after which the kernel should panic when +``panic_on_warn`` is not set. Setting this to 0 disables checking +the warning count. Setting this to 1 has the same effect as setting +``panic_on_warn=1``. The default value is 0. + + watchdog ======== diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst index c9412eb855523c8cb1c47976734f1df948bbb952..62f961610773d68560b35217a9cb75879ce7b158 100644 --- a/Documentation/core-api/kernel-api.rst +++ b/Documentation/core-api/kernel-api.rst @@ -36,6 +36,9 @@ String Conversions String Manipulation ------------------- +.. kernel-doc:: include/linux/fortify-string.h + :internal: + .. kernel-doc:: lib/string.c :export: diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst index f225a953ab4b9fb4891833872a1748d4c58d80a2..3506899ef83e394cd24463dbfcd170496c44ab2d 100644 --- a/Documentation/crypto/devel-algos.rst +++ b/Documentation/crypto/devel-algos.rst @@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from other part of the kernel. Note that the .setkey() call might happen before or after any of these schematics happen, but must not happen during any of these are in-flight. Please note that calling .init() -followed immediately by .finish() is also a perfectly valid +followed immediately by .final() is also a perfectly valid transformation. :: diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst index b45dabbf69d685d937817498e8a422f933cca86f..f80f243e227e623e5ee89a5615745cdcee7a66f5 100644 --- a/Documentation/crypto/userspace-if.rst +++ b/Documentation/crypto/userspace-if.rst @@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message digest, the flag MSG_TRUNC is set by the kernel. In order to set a message digest key, the calling application must use -the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC -operation is performed without the initial HMAC state change caused by -the key. +the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the +key is not set the HMAC operation is performed without the initial HMAC state +change caused by the key. Symmetric Cipher API -------------------- @@ -382,6 +382,15 @@ mentioned optname: - the RNG cipher type to provide the seed +- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t. + This operation behaves the same as ALG_SET_KEY. The decrypted + data is copied from a keyring key, and uses that data as the + key for symmetric encryption. + + The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH + permission set, otherwise -EPERM is returned. Supports key types: user, + logon, encrypted, and trusted. + - ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for AEAD ciphers. For a encryption operation, the authentication tag of the given size will be generated. For a decryption operation, the diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1a9da8bff7a7506c97b26fb902e47efb8f487cf --- /dev/null +++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml @@ -0,0 +1,127 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Rockchip Electronics Security Accelerator + +maintainers: + - Heiko Stuebner + +properties: + compatible: + enum: + - rockchip,rk3288-crypto + - rockchip,rk3328-crypto + - rockchip,rk3399-crypto + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + minItems: 3 + maxItems: 4 + + clock-names: + minItems: 3 + maxItems: 4 + + resets: + minItems: 1 + maxItems: 3 + + reset-names: + minItems: 1 + maxItems: 3 + +allOf: + - if: + properties: + compatible: + contains: + const: rockchip,rk3288-crypto + then: + properties: + clocks: + minItems: 4 + clock-names: + items: + - const: aclk + - const: hclk + - const: sclk + - const: apb_pclk + resets: + maxItems: 1 + reset-names: + items: + - const: crypto-rst + - if: + properties: + compatible: + contains: + const: rockchip,rk3328-crypto + then: + properties: + clocks: + maxItems: 3 + clock-names: + items: + - const: hclk_master + - const: hclk_slave + - const: sclk + resets: + maxItems: 1 + reset-names: + items: + - const: crypto-rst + - if: + properties: + compatible: + contains: + const: rockchip,rk3399-crypto + then: + properties: + clocks: + maxItems: 3 + clock-names: + items: + - const: hclk_master + - const: hclk_slave + - const: sclk + resets: + minItems: 3 + reset-names: + items: + - const: master + - const: slave + - const: crypto-rst + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - resets + - reset-names + +additionalProperties: false + +examples: + - | + #include + #include + crypto@ff8a0000 { + compatible = "rockchip,rk3288-crypto"; + reg = <0xff8a0000 0x4000>; + interrupts = ; + clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>, + <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>; + clock-names = "aclk", "hclk", "sclk", "apb_pclk"; + resets = <&cru SRST_CRYPTO>; + reset-names = "crypto-rst"; + }; diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt deleted file mode 100644 index 5e2ba385b8c9b9a6118af61c36c2a4e0b8463287..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt +++ /dev/null @@ -1,28 +0,0 @@ -Rockchip Electronics And Security Accelerator - -Required properties: -- compatible: Should be "rockchip,rk3288-crypto" -- reg: Base physical address of the engine and length of memory mapped - region -- interrupts: Interrupt number -- clocks: Reference to the clocks about crypto -- clock-names: "aclk" used to clock data - "hclk" used to clock data - "sclk" used to clock crypto accelerator - "apb_pclk" used to clock dma -- resets: Must contain an entry for each entry in reset-names. - See ../reset/reset.txt for details. -- reset-names: Must include the name "crypto-rst". - -Examples: - - crypto: cypto-controller@ff8a0000 { - compatible = "rockchip,rk3288-crypto"; - reg = <0xff8a0000 0x4000>; - interrupts = ; - clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>, - <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>; - clock-names = "aclk", "hclk", "sclk", "apb_pclk"; - resets = <&cru SRST_CRYPTO>; - reset-names = "crypto-rst"; - }; diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml index ed23bf94a8e00a5e11ca763ee35a779e29698928..6759c5bf3e572260af2d5b4053c5201d5481bee8 100644 --- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml +++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml @@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml# title: STMicroelectronics STM32 CRYP bindings +description: The STM32 CRYP block is built on the CRYP block found in + the STn8820 SoC introduced in 2007, and subsequently used in the U8500 + SoC in 2010. + maintainers: - Lionel Debieve properties: compatible: enum: + - st,stn8820-cryp + - stericsson,ux500-cryp - st,stm32f756-cryp - st,stm32mp1-cryp @@ -27,6 +33,19 @@ properties: resets: maxItems: 1 + dmas: + items: + - description: mem2cryp DMA channel + - description: cryp2mem DMA channel + + dma-names: + items: + - const: mem2cryp + - const: cryp2mem + + power-domains: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml index f32e09ef937c023dcf19b13a73950bbbb9591e5b..10e56cf306dbab5829fa9941d9b257c5bcc18a78 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml @@ -35,7 +35,7 @@ properties: gpio-line-names: description: strings describing the names of each gpio line. minItems: 1 - maxItems: 100 + maxItems: 144 "#gpio-cells": const: 2 diff --git a/Documentation/devicetree/bindings/gpio/gpio-latch.yaml b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1ed82a2cebdaa446fc2466cd6362399845ce6ce9 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/gpio-latch.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: GPIO latch controller + +maintainers: + - Sascha Hauer + +description: | + This binding describes a GPIO multiplexer based on latches connected to + other GPIOs, like this: + + CLK0 ----------------------. ,--------. + CLK1 -------------------. `--------|> #0 | + | | | + OUT0 ----------------+--|-----------|D0 Q0|-----|< + OUT1 --------------+-|--|-----------|D1 Q1|-----|< + OUT2 ------------+-|-|--|-----------|D2 Q2|-----|< + OUT3 ----------+-|-|-|--|-----------|D3 Q3|-----|< + OUT4 --------+-|-|-|-|--|-----------|D4 Q4|-----|< + OUT5 ------+-|-|-|-|-|--|-----------|D5 Q5|-----|< + OUT6 ----+-|-|-|-|-|-|--|-----------|D6 Q6|-----|< + OUT7 --+-|-|-|-|-|-|-|--|-----------|D7 Q7|-----|< + | | | | | | | | | `--------' + | | | | | | | | | + | | | | | | | | | ,--------. + | | | | | | | | `-----------|> #1 | + | | | | | | | | | | + | | | | | | | `--------------|D0 Q0|-----|< + | | | | | | `----------------|D1 Q1|-----|< + | | | | | `------------------|D2 Q2|-----|< + | | | | `--------------------|D3 Q3|-----|< + | | | `----------------------|D4 Q4|-----|< + | | `------------------------|D5 Q5|-----|< + | `--------------------------|D6 Q6|-----|< + `----------------------------|D7 Q7|-----|< + `--------' + + The number of clk-gpios and latched-gpios is not fixed. The actual number + of number of latches and the number of inputs per latch is derived from + the number of GPIOs given in the corresponding device tree properties. + +properties: + compatible: + const: gpio-latch + "#gpio-cells": + const: 2 + + clk-gpios: + description: Array of GPIOs to be used to clock a latch + + latched-gpios: + description: Array of GPIOs to be used as inputs per latch + + setup-duration-ns: + description: Delay in nanoseconds to wait after the latch inputs have been + set up + + clock-duration-ns: + description: Delay in nanoseconds to wait between clock output changes + + gpio-controller: true + + gpio-line-names: true + +required: + - compatible + - "#gpio-cells" + - gpio-controller + - clk-gpios + - latched-gpios + +additionalProperties: false + +examples: + - | + gpio-latch { + #gpio-cells = <2>; + pinctrl-names = "default"; + pinctrl-0 = <&pinctrl_di_do_leds>; + compatible = "gpio-latch"; + gpio-controller; + setup-duration-ns = <100>; + clock-duration-ns = <100>; + + clk-gpios = <&gpio3 7 0>, <&gpio3 8 0>; + latched-gpios = <&gpio3 21 0>, <&gpio3 22 0>, + <&gpio3 23 0>, <&gpio3 24 0>, + <&gpio3 25 0>, <&gpio3 26 0>, + <&gpio3 27 0>, <&gpio3 28 0>; + }; diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml index 1acaa0a3d35afe43a7dd294663ea698ccc8a5ae8..48bf414aa50ea1922a5f3b194807a80249c26cdc 100644 --- a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml +++ b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml @@ -12,6 +12,7 @@ maintainers: properties: compatible: enum: + - dlg,slg7xl45106 - nxp,pca9570 - nxp,pca9571 diff --git a/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml new file mode 100644 index 0000000000000000000000000000000000000000..735d97d645a09f460ade8f6cb272a959d3327ae5 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/gpio/hisilicon,ascend910-gpio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon common GPIO controller + +maintainers: + - Jay Fang + +description: + The HiSilicon common GPIO controller can be used for many different + types of SoC such as Huawei Ascend AI series chips. + +properties: + compatible: + const: hisilicon,ascend910-gpio + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + gpio-controller: true + + "#gpio-cells": + const: 2 + + ngpios: + minimum: 1 + maximum: 32 + +required: + - compatible + - reg + - interrupts + - gpio-controller + - "#gpio-cells" + - ngpios + +additionalProperties: false + +examples: + - | + #include + + gpio@840d0000 { + compatible = "hisilicon,ascend910-gpio"; + reg = <0x840d0000 0x1000>; + ngpios = <32>; + gpio-controller; + #gpio-cells = <2>; + interrupts = ; + }; diff --git a/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7d7a8de7bcd89027154b247ae1a2d0620ae2d37a --- /dev/null +++ b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml @@ -0,0 +1,73 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/i2c/hisilicon,ascend910-i2c.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: HiSilicon common I2C controller + +maintainers: + - Yicong Yang + +description: + The HiSilicon common I2C controller can be used for many different + types of SoC such as Huawei Ascend AI series chips. + +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + +properties: + compatible: + const: hisilicon,ascend910-i2c + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 1 + + clock-frequency: + default: 400000 + + i2c-sda-falling-time-ns: + default: 343 + + i2c-scl-falling-time-ns: + default: 203 + + i2c-sda-hold-time-ns: + default: 830 + + i2c-scl-rising-time-ns: + default: 365 + + i2c-digital-filter-width-ns: + default: 0 + +required: + - compatible + - reg + - interrupts + +unevaluatedProperties: false + +examples: + - | + #include + + i2c@38b0000 { + compatible = "hisilicon,ascend910-i2c"; + reg = <0x38b0000 0x10000>; + interrupts = ; + i2c-sda-falling-time-ns = <56>; + i2c-scl-falling-time-ns = <56>; + i2c-sda-hold-time-ns = <56>; + i2c-scl-rising-time-ns = <56>; + i2c-digital-filter; + i2c-digital-filter-width-ns = <0x0>; + clocks = <&alg_clk>; + clock-frequency = <400000>; + }; diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml index 4e730fb7be5679127937f512d4b3005dfcbd7316..421563bf576cd5ca064a617f6d29c9c8508b1f45 100644 --- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml +++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml @@ -23,6 +23,7 @@ properties: - const: mediatek,mt6577-i2c - const: mediatek,mt6589-i2c - const: mediatek,mt7622-i2c + - const: mediatek,mt7986-i2c - const: mediatek,mt8168-i2c - const: mediatek,mt8173-i2c - const: mediatek,mt8183-i2c diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml index 93c164aa00dafc43b558c223bd06461b08c34cc9..984fc1ed3ec6ae2ef84a66331280908dc2a29034 100644 --- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml +++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml @@ -19,6 +19,7 @@ properties: - const: allwinner,sun6i-a31-i2c - items: - enum: + - allwinner,suniv-f1c100s-i2c - allwinner,sun8i-a23-i2c - allwinner,sun8i-a83t-i2c - allwinner,sun8i-v536-i2c diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml index 0e7ed00562e21bcaa9409bc365957ec90ed60c9e..f5f7dc8f325cb28b8829ff76acdf13fa1264afd8 100644 --- a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml +++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml @@ -10,18 +10,19 @@ maintainers: - Andy Gross - Bjorn Andersson -allOf: - - $ref: /schemas/i2c/i2c-controller.yaml# - properties: compatible: - const: qcom,geni-i2c + enum: + - qcom,geni-i2c + - qcom,geni-i2c-master-hub clocks: - maxItems: 1 + minItems: 1 + maxItems: 2 clock-names: - const: se + minItems: 1 + maxItems: 2 clock-frequency: default: 100000 @@ -35,13 +36,12 @@ properties: - const: rx interconnects: + minItems: 2 maxItems: 3 interconnect-names: - items: - - const: qup-core - - const: qup-config - - const: qup-memory + minItems: 2 + maxItems: 3 interrupts: maxItems: 1 @@ -71,6 +71,50 @@ required: - clock-names - reg +allOf: + - $ref: /schemas/i2c/i2c-controller.yaml# + - if: + properties: + compatible: + contains: + const: qcom,geni-i2c-master-hub + then: + properties: + clocks: + minItems: 2 + + clock-names: + items: + - const: se + - const: core + + dmas: false + dma-names: false + + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: qup-core + - const: qup-config + else: + properties: + clocks: + maxItems: 1 + + clock-names: + const: se + + interconnects: + minItems: 3 + + interconnect-names: + items: + - const: qup-core + - const: qup-config + - const: qup-memory + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml index d3c0d5c427acbcbf39fd6ad07fb525980dac3bd8..2291a7cd619be4eedef4be803663fadc60f946a8 100644 --- a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml +++ b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml @@ -19,7 +19,7 @@ properties: - enum: - renesas,riic-r7s72100 # RZ/A1H - renesas,riic-r7s9210 # RZ/A2M - - renesas,riic-r9a07g043 # RZ/G2UL + - renesas,riic-r9a07g043 # RZ/G2UL and RZ/Five - renesas,riic-r9a07g044 # RZ/G2{L,LC} - renesas,riic-r9a07g054 # RZ/V2L - const: renesas,riic-rz # RZ/A or RZ/G2L diff --git a/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8eaa07ae9774350fac77fa8e8e939f5abe31b5bb --- /dev/null +++ b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml @@ -0,0 +1,168 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/baikal,bt1-pcie.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Baikal-T1 PCIe Root Port Controller + +maintainers: + - Serge Semin + +description: + Embedded into Baikal-T1 SoC Root Complex controller with a single port + activated. It's based on the DWC RC PCIe v4.60a IP-core, which is configured + to have just a single Root Port function and is capable of establishing the + link up to Gen.3 speed on x4 lanes. It doesn't have embedded clock and reset + control module, so the proper interface initialization is supposed to be + performed by software. There four in- and four outbound iATU regions + which can be used to emit all required TLP types on the PCIe bus. + +allOf: + - $ref: /schemas/pci/snps,dw-pcie.yaml# + +properties: + compatible: + const: baikal,bt1-pcie + + reg: + description: + DBI, DBI2 and at least 4KB outbound iATU-capable region for the + peripheral devices CFG-space access. + maxItems: 3 + + reg-names: + items: + - const: dbi + - const: dbi2 + - const: config + + interrupts: + description: + MSI, AER, PME, Hot-plug, Link Bandwidth Management, Link Equalization + request and eight Read/Write eDMA IRQ lines are available. + maxItems: 14 + + interrupt-names: + items: + - const: dma0 + - const: dma1 + - const: dma2 + - const: dma3 + - const: dma4 + - const: dma5 + - const: dma6 + - const: dma7 + - const: msi + - const: aer + - const: pme + - const: hp + - const: bw_mg + - const: l_eq + + clocks: + description: + DBI (attached to the APB bus), AXI-bus master and slave interfaces + are fed up by the dedicated application clocks. A common reference + clock signal is supposed to be attached to the corresponding Ref-pad + of the SoC. It will be redistributed amongst the controller core + sub-modules (pipe, core, aux, etc). + maxItems: 4 + + clock-names: + items: + - const: dbi + - const: mstr + - const: slv + - const: ref + + resets: + description: + A comprehensive controller reset logic is supposed to be implemented + by software, so almost all the possible application and core reset + signals are exposed via the system CCU module. + maxItems: 9 + + reset-names: + items: + - const: mstr + - const: slv + - const: pwr + - const: hot + - const: phy + - const: core + - const: pipe + - const: sticky + - const: non-sticky + + baikal,bt1-syscon: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Phandle to the Baikal-T1 System Controller DT node. It's required to + access some additional PM, Reset-related and LTSSM signals. + + num-lanes: + maximum: 4 + + max-link-speed: + maximum: 3 + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + +unevaluatedProperties: false + +examples: + - | + #include + #include + + pcie@1f052000 { + compatible = "baikal,bt1-pcie"; + device_type = "pci"; + reg = <0x1f052000 0x1000>, <0x1f053000 0x1000>, <0x1bdbf000 0x1000>; + reg-names = "dbi", "dbi2", "config"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0x00000000 0x1bdb0000 0 0x00008000>, + <0x82000000 0 0x20000000 0x08000000 0 0x13db0000>; + bus-range = <0x0 0xff>; + + interrupts = , + , + , + , + , + , + , + , + , + , + , + , + , + ; + interrupt-names = "dma0", "dma1", "dma2", "dma3", + "dma4", "dma5", "dma6", "dma7", + "msi", "aer", "pme", "hp", "bw_mg", + "l_eq"; + + clocks = <&ccu_sys 1>, <&ccu_axi 6>, <&ccu_axi 7>, <&clk_pcie>; + clock-names = "dbi", "mstr", "slv", "ref"; + + resets = <&ccu_axi 6>, <&ccu_axi 7>, <&ccu_sys 7>, <&ccu_sys 10>, + <&ccu_sys 4>, <&ccu_sys 6>, <&ccu_sys 5>, <&ccu_sys 8>, + <&ccu_sys 9>; + reset-names = "mstr", "slv", "pwr", "hot", "phy", "core", "pipe", + "sticky", "non-sticky"; + + reset-gpios = <&port0 0 GPIO_ACTIVE_LOW>; + + num-lanes = <4>; + max-link-speed = <3>; + }; +... diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml index 376e739bcad40a89083067f18d7837a5b64af43e..49b4f7a32e71e4eba5267f8559914f4e36f22790 100644 --- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml @@ -14,9 +14,6 @@ description: |+ This PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in snps,dw-pcie.yaml. -allOf: - - $ref: /schemas/pci/snps,dw-pcie.yaml# - properties: compatible: enum: @@ -61,7 +58,7 @@ properties: - const: pcie - const: pcie_bus - const: pcie_phy - - const: pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie + - enum: [ pcie_inbound_axi, pcie_aux ] num-lanes: const: 1 @@ -175,6 +172,47 @@ required: - clocks - clock-names +allOf: + - $ref: /schemas/pci/snps,dw-pcie.yaml# + - if: + properties: + compatible: + contains: + const: fsl,imx6sx-pcie + then: + properties: + clock-names: + items: + - {} + - {} + - {} + - const: pcie_inbound_axi + - if: + properties: + compatible: + contains: + const: fsl,imx8mq-pcie + then: + properties: + clock-names: + items: + - {} + - {} + - {} + - const: pcie_aux + - if: + properties: + compatible: + not: + contains: + enum: + - fsl,imx6sx-pcie + - fsl,imx8mq-pcie + then: + properties: + clock-names: + maxItems: 3 + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml index c00be39af64e53e801a5d4d551235b5e16f504fe..7e8c7a2a5f9b6c387b1ec2fd7b5a47dcee5867e1 100644 --- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml +++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml @@ -43,14 +43,12 @@ description: |+ each set has its own address for MSI message, and supports 32 MSI vectors to generate interrupt. -allOf: - - $ref: /schemas/pci/pci-bus.yaml# - properties: compatible: oneOf: - items: - enum: + - mediatek,mt7986-pcie - mediatek,mt8188-pcie - mediatek,mt8195-pcie - const: mediatek,mt8192-pcie @@ -70,29 +68,29 @@ properties: minItems: 1 maxItems: 8 + iommu-map: + maxItems: 1 + + iommu-map-mask: + const: 0 + resets: minItems: 1 maxItems: 2 reset-names: minItems: 1 + maxItems: 2 items: - - const: phy - - const: mac + enum: [ phy, mac ] clocks: + minItems: 4 maxItems: 6 clock-names: - items: - - const: pl_250m - - const: tl_26m - - const: tl_96m - - const: tl_32k - - const: peri_26m - - enum: - - top_133m # for MT8192 - - peri_mem # for MT8188/MT8195 + minItems: 4 + maxItems: 6 assigned-clocks: maxItems: 1 @@ -107,6 +105,9 @@ properties: items: - const: pcie-phy + power-domains: + maxItems: 1 + '#interrupt-cells': const: 1 @@ -138,6 +139,54 @@ required: - '#interrupt-cells' - interrupt-controller +allOf: + - $ref: /schemas/pci/pci-bus.yaml# + - if: + properties: + compatible: + const: mediatek,mt8192-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: tl_96m + - const: tl_32k + - const: peri_26m + - const: top_133m + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt8188-pcie + - mediatek,mt8195-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: tl_96m + - const: tl_32k + - const: peri_26m + - const: peri_mem + - if: + properties: + compatible: + contains: + enum: + - mediatek,mt7986-pcie + then: + properties: + clock-names: + items: + - const: pl_250m + - const: tl_26m + - const: peri_26m + - const: top_133m + unevaluatedProperties: false examples: diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml index 54f07852d279e7bcdf28660935c6baada64cd4ed..a5859bb3dc28cd6661c1a014ff7272e8b51f83e5 100644 --- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml +++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml @@ -62,6 +62,16 @@ properties: minItems: 3 maxItems: 13 + dma-coherent: true + + interconnects: + maxItems: 2 + + interconnect-names: + items: + - const: pcie-mem + - const: cpu-pcie + resets: minItems: 1 maxItems: 12 @@ -631,6 +641,18 @@ allOf: items: - const: pci # PCIe core reset + - if: + properties: + compatible: + contains: + enum: + - qcom,pcie-sa8540p + - qcom,pcie-sc8280xp + then: + required: + - interconnects + - interconnect-names + - if: not: properties: diff --git a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml index bc0a9d1db750bebbdfcbc7c6bf2c3b26f300fa0d..2be72ae1169f99423a67ee81d22ec89224515338 100644 --- a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml @@ -14,10 +14,10 @@ maintainers: description: |+ RK3568 SoC PCIe host controller is based on the Synopsys DesignWare PCIe IP and thus inherits all the common properties defined in - designware-pcie.txt. + snps,dw-pcie.yaml. allOf: - - $ref: /schemas/pci/pci-bus.yaml# + - $ref: /schemas/pci/snps,dw-pcie.yaml# properties: compatible: diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d87e13496834a48098768a56b4c90a742a70b486 --- /dev/null +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml @@ -0,0 +1,266 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/pci/snps,dw-pcie-common.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Synopsys DWC PCIe RP/EP controller + +maintainers: + - Jingoo Han + - Gustavo Pimentel + +description: + Generic Synopsys DesignWare PCIe Root Port and Endpoint controller + properties. + +select: false + +properties: + reg: + description: + DWC PCIe CSR space is normally accessed over the dedicated Data Bus + Interface - DBI. In accordance with the reference manual the register + configuration space belongs to the Configuration-Dependent Module (CDM) + and is split up into several sub-parts Standard PCIe configuration + space, Port Logic Registers (PL), Shadow Config-space Registers, + iATU/eDMA registers. The particular sub-space is selected by the + CDM/ELBI (dbi_cs) and CS2 (dbi_cs2) signals (selector bits). Such + configuration provides a flexible interface for the system engineers to + either map the particular space at a desired MMIO address or just leave + them in a contiguous memory space if pure Native or AXI Bridge DBI access + is selected. Note the PCIe CFG-space, PL and Shadow registers are + specific for each activated function, while the rest of the sub-spaces + are common for all of them (if there are more than one). + minItems: 2 + maxItems: 6 + + reg-names: + minItems: 2 + maxItems: 6 + + interrupts: + description: + There are two main sub-blocks which are normally capable of + generating interrupts. It's System Information Interface and MSI + interface. While the former one has some common for the Host and + Endpoint controllers IRQ-signals, the later interface is obviously + Root Complex specific since it's responsible for the incoming MSI + messages signalling. The System Information IRQ signals are mainly + responsible for reporting the generic PCIe hierarchy and Root + Complex events like VPD IO request, general AER, PME, Hot-plug, link + bandwidth change, link equalization request, INTx asserted/deasserted + Message detection, embedded DMA Tx/Rx/Error. + minItems: 1 + maxItems: 26 + + interrupt-names: + minItems: 1 + maxItems: 26 + + clocks: + description: + DWC PCIe reference manual explicitly defines a set of the clocks required + to get the controller working correctly. In general all of them can + be divided into two groups':' application and core clocks. Note the + platforms may have some of the clock sources unspecified in case if the + corresponding domains are fed up from a common clock source. + minItems: 1 + maxItems: 7 + + clock-names: + minItems: 1 + maxItems: 7 + items: + oneOf: + - description: + Data Bus Interface (DBI) clock. Clock signal for the AXI-bus + interface of the Configuration-Dependent Module, which is + basically the set of the controller CSRs. + const: dbi + - description: + Application AXI-bus Master interface clock. Basically this is + a clock for the controller DMA interface (PCI-to-CPU). + const: mstr + - description: + Application AXI-bus Slave interface clock. This is a clock for + the CPU-to-PCI memory IO interface. + const: slv + - description: + Controller Core-PCS PIPE interface clock. It's normally + supplied by an external PCS-PHY. + const: pipe + - description: + Controller Primary clock. It's assumed that all controller input + signals (except resets) are synchronous to this clock. + const: core + - description: + Auxiliary clock for the controller PMC domain. The controller + partitioning implies having some parts to operate with this + clock in some power management states. + const: aux + - description: + Generic reference clock. In case if there are several + interfaces fed up with a common clock source it's advisable to + define it with this name (for instance pipe, core and aux can + be connected to a single source of the periodic signal). + const: ref + - description: + Clock for the PHY registers interface. Originally this is + a PHY-viewport-based interface, but some platform may have + specifically designed one. + const: phy_reg + - description: + Vendor-specific clock names. Consider using the generic names + above for new bindings. + oneOf: + - description: See native 'dbi' clock for details + enum: [ pcie, pcie_apb_sys, aclk_dbi ] + - description: See native 'mstr/slv' clock for details + enum: [ pcie_bus, pcie_inbound_axi, pcie_aclk, aclk_mst, aclk_slv ] + - description: See native 'pipe' clock for details + enum: [ pcie_phy, pcie_phy_ref, link ] + - description: See native 'aux' clock for details + enum: [ pcie_aux ] + - description: See native 'ref' clock for details. + enum: [ gio ] + - description: See nativs 'phy_reg' clock for details + enum: [ pcie_apb_phy, pclk ] + + resets: + description: + DWC PCIe reference manual explicitly defines a set of the reset + signals required to be de-asserted to properly activate the controller + sub-parts. All of these signals can be divided into two sub-groups':' + application and core resets with respect to the main sub-domains they + are supposed to reset. Note the platforms may have some of these signals + unspecified in case if they are automatically handled or aggregated into + a comprehensive control module. + minItems: 1 + maxItems: 10 + + reset-names: + minItems: 1 + maxItems: 10 + items: + oneOf: + - description: Data Bus Interface (DBI) domain reset + const: dbi + - description: AXI-bus Master interface reset + const: mstr + - description: AXI-bus Slave interface reset + const: slv + - description: Application-dependent interface reset + const: app + - description: Controller Non-sticky CSR flags reset + const: non-sticky + - description: Controller sticky CSR flags reset + const: sticky + - description: PIPE-interface (Core-PCS) logic reset + const: pipe + - description: + Controller primary reset (resets everything except PMC module) + const: core + - description: PCS/PHY block reset + const: phy + - description: PMC hot reset signal + const: hot + - description: Cold reset signal + const: pwr + - description: + Vendor-specific reset names. Consider using the generic names + above for new bindings. + oneOf: + - description: See native 'app' reset for details + enum: [ apps, gio, apb ] + - description: See native 'phy' reset for details + enum: [ pciephy, link ] + - description: See native 'pwr' reset for details + enum: [ turnoff ] + + phys: + description: + There can be up to the number of possible lanes PHYs specified placed in + the phandle array in the line-based order. Obviously each the specified + PHYs are supposed to be able to work in the PCIe mode with a speed + implied by the DWC PCIe controller they are attached to. + minItems: 1 + maxItems: 16 + + phy-names: + minItems: 1 + maxItems: 16 + oneOf: + - description: Generic PHY names + items: + pattern: '^pcie[0-9]+$' + - description: + Vendor-specific PHY names. Consider using the generic + names above for new bindings. + items: + oneOf: + - pattern: '^pcie(-?phy[0-9]*)?$' + - pattern: '^p2u-[0-7]$' + + reset-gpio: + deprecated: true + description: + Reference to the GPIO-controlled PERST# signal. It is used to reset all + the peripheral devices available on the PCIe bus. + maxItems: 1 + + reset-gpios: + description: + Reference to the GPIO-controlled PERST# signal. It is used to reset all + the peripheral devices available on the PCIe bus. + maxItems: 1 + + max-link-speed: + maximum: 5 + + num-lanes: + description: + Number of PCIe link lanes to use. Can be omitted if the already brought + up link is supposed to be preserved. + maximum: 16 + + num-ob-windows: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of outbound address translation windows. This parameter can be + auto-detected based on the iATU memory writability. So there is no + point in having a dedicated DT-property for it. + maximum: 256 + + num-ib-windows: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of inbound address translation windows. In the same way as + for the outbound AT windows, this parameter can be auto-detected based + on the iATU memory writability. There is no point having a dedicated + DT-property for it either. + maximum: 256 + + num-viewport: + $ref: /schemas/types.yaml#/definitions/uint32 + deprecated: true + description: + Number of outbound view ports configured in hardware. It's the same as + the number of outbound AT windows. + maximum: 256 + + snps,enable-cdm-check: + $ref: /schemas/types.yaml#/definitions/flag + description: + Enable automatic checking of CDM (Configuration Dependent Module) + registers for data corruption. CDM registers include standard PCIe + configuration space registers, Port Logic registers, DMA and iATU + registers. This feature has been available since DWC PCIe v4.80a. + + dma-coherent: true + +additionalProperties: true + +... diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml index b78535040f04c6d7ce37058649949f818137f7f0..8fc2151691a47b13d96bd52b39af3a7fe3dfdb94 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml @@ -13,76 +13,182 @@ maintainers: description: | Synopsys DesignWare PCIe host controller endpoint +# Please create a separate DT-schema for your DWC PCIe Endpoint controller +# and make sure it's assigned with the vendor-specific compatible string. +select: + properties: + compatible: + const: snps,dw-pcie-ep + required: + - compatible + allOf: - $ref: /schemas/pci/pci-ep.yaml# + - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: - compatible: - anyOf: - - {} - - const: snps,dw-pcie-ep - reg: - description: | - It should contain Data Bus Interface (dbi) and config registers for all - versions. - For designware core version >= 4.80, it may contain ATU address space. + description: + DBI, DBI2 reg-spaces and outbound memory window are required for the + normal controller functioning. iATU memory IO region is also required + if the space is unrolled (IP-core version >= 4.80a). minItems: 2 - maxItems: 4 + maxItems: 5 reg-names: minItems: 2 - maxItems: 4 + maxItems: 5 items: - enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl] - - reset-gpio: - description: GPIO pin number of PERST# signal - maxItems: 1 - deprecated: true - - reset-gpios: - description: GPIO controlled connection to PERST# signal - maxItems: 1 - - snps,enable-cdm-check: - type: boolean - description: | - This is a boolean property and if present enables - automatic checking of CDM (Configuration Dependent Module) registers - for data corruption. CDM registers include standard PCIe configuration - space registers, Port Logic registers, DMA and iATU (internal Address - Translation Unit) registers. - - num-ib-windows: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: number of inbound address translation windows - deprecated: true - - num-ob-windows: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: number of outbound address translation windows - deprecated: true + oneOf: + - description: + Basic DWC PCIe controller configuration-space accessible over + the DBI interface. This memory space is either activated with + CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region + with all spaces. Note iATU/eDMA CSRs are indirectly accessible + via the PL viewports on the DWC PCIe controllers older than + v4.80a. + const: dbi + - description: + Shadow DWC PCIe config-space registers. This space is selected + by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of + the PCI-SIG PCIe CFG-space with the shadow registers for some + PCI Header space, PCI Standard and Extended Structures. It's + mainly relevant for the end-point controller configuration, + but still there are some shadow registers available for the + Root Port mode too. + const: dbi2 + - description: + External Local Bus registers. It's an application-dependent + registers normally defined by the platform engineers. The space + can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can + be accessed over some platform-specific means (for instance + as a part of a system controller). + enum: [ elbi, app ] + - description: + iATU/eDMA registers common for all device functions. It's an + unrolled memory space with the internal Address Translation + Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1 + and CS2 = 1. For IP-core releases prior v4.80a, these registers + have been programmed via an indirect addressing scheme using a + set of viewport CSRs mapped into the PL space. Note iATU is + normally mapped to the 0x0 address of this region, while eDMA + is available at 0x80000 base address. + const: atu + - description: + Platform-specific eDMA registers. Some platforms may have eDMA + CSRs mapped in a non-standard base address. The registers offset + can be changed or the MS/LS-bits of the address can be attached + in an additional RTL block before the MEM-IO transactions reach + the DW PCIe slave interface. + const: dma + - description: + PHY/PCS configuration registers. Some platforms can have the + PCS and PHY CSRs accessible over a dedicated memory mapped + region, but mainly these registers are indirectly accessible + either by means of the embedded PHY viewport schema or by some + platform-specific method. + const: phy + - description: + Outbound iATU-capable memory-region which will be used to + generate various application-specific traffic on the PCIe bus + hierarchy. It's usage scenario depends on the endpoint + functionality, for instance it can be used to create MSI(X) + messages. + const: addr_space + - description: + Vendor-specific CSR names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native 'elbi/app' CSR region for details. + enum: [ link, appl ] + - description: See native 'atu' CSR region for details. + enum: [ atu_dma ] + allOf: + - contains: + const: dbi + - contains: + const: addr_space + + interrupts: + description: + There is no mandatory IRQ signals for the normal controller functioning, + but in addition to the native set the platforms may have a link- or + PM-related IRQs specified. + minItems: 1 + maxItems: 20 + + interrupt-names: + minItems: 1 + maxItems: 20 + items: + oneOf: + - description: + Controller request to read or write virtual product data + from/to the VPD capability registers. + const: vpd + - description: + Link Equalization Request flag is set in the Link Status 2 + register (applicable if the corresponding IRQ is enabled in + the Link Control 3 register). + const: l_eq + - description: + Indicates that the eDMA Tx/Rx transfer is complete or that an + error has occurred on the corresponding channel. eDMA can have + eight Tx (Write) and Rx (Read) eDMA channels thus supporting up + to 16 IRQ signals all together. Write eDMA channels shall go + first in the ordered row as per default edma_int[*] bus setup. + pattern: '^dma([0-9]|1[0-5])?$' + - description: + PCIe protocol correctable error or a Data Path protection + correctable error is detected by the automotive/safety + feature. + const: sft_ce + - description: + Indicates that the internal safety mechanism has detected an + uncorrectable error. + const: sft_ue + - description: + Application-specific IRQ raised depending on the vendor-specific + events basis. + const: app + - description: + Vendor-specific IRQ names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native "app" IRQ for details + enum: [ intr ] + + max-functions: + maximum: 32 required: + - compatible - reg - reg-names - - compatible additionalProperties: true examples: - | - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie-ep@dfd00000 { - compatible = "snps,dw-pcie-ep"; - reg = <0xdfc00000 0x0001000>, /* IP registers 1 */ - <0xdfc01000 0x0001000>, /* IP registers 2 */ - <0xd0000000 0x2000000>; /* Configuration space */ - reg-names = "dbi", "dbi2", "addr_space"; - }; + pcie-ep@dfd00000 { + compatible = "snps,dw-pcie-ep"; + reg = <0xdfc00000 0x0001000>, /* IP registers 1 */ + <0xdfc01000 0x0001000>, /* IP registers 2 */ + <0xd0000000 0x2000000>; /* Configuration space */ + reg-names = "dbi", "dbi2", "addr_space"; + + interrupts = <23>, <24>; + interrupt-names = "dma0", "dma1"; + + clocks = <&sys_clk 12>, <&sys_clk 24>; + clock-names = "dbi", "ref"; + + resets = <&sys_rst 12>, <&sys_rst 24>; + reset-names = "dbi", "phy"; + + phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>; + phy-names = "pcie0", "pcie1", "pcie2", "pcie3"; + + max-link-speed = <3>; + max-functions = /bits/ 8 <4>; }; diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml index 7e0b015f14146ebda1e6d55f298b74a34bac6e12..1a83f0f65f191edaaa75d7e2331600b1d7066200 100644 --- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml @@ -13,20 +13,25 @@ maintainers: description: | Synopsys DesignWare PCIe host controller +# Please create a separate DT-schema for your DWC PCIe Root Port controller +# and make sure it's assigned with the vendor-specific compatible string. +select: + properties: + compatible: + const: snps,dw-pcie + required: + - compatible + allOf: - $ref: /schemas/pci/pci-bus.yaml# + - $ref: /schemas/pci/snps,dw-pcie-common.yaml# properties: - compatible: - anyOf: - - {} - - const: snps,dw-pcie - reg: - description: | - It should contain Data Bus Interface (dbi) and config registers for all - versions. - For designware core version >= 4.80, it may contain ATU address space. + description: + At least DBI reg-space and peripheral devices CFG-space outbound window + are required for the normal controller work. iATU memory IO region is + also required if the space is unrolled (IP-core version >= 4.80a). minItems: 2 maxItems: 5 @@ -34,71 +39,194 @@ properties: minItems: 2 maxItems: 5 items: - enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl, - parf, cfg, link, ulreg, smu, mpu, apb, phy, ecam ] - - num-lanes: - description: | - number of lanes to use (this property should be specified unless - the link is brought already up in firmware) - maximum: 16 - - reset-gpio: - description: GPIO pin number of PERST# signal - maxItems: 1 - deprecated: true - - reset-gpios: - description: GPIO controlled connection to PERST# signal - maxItems: 1 - - interrupts: true - - interrupt-names: true - - clocks: true - - snps,enable-cdm-check: - type: boolean - description: | - This is a boolean property and if present enables - automatic checking of CDM (Configuration Dependent Module) registers - for data corruption. CDM registers include standard PCIe configuration - space registers, Port Logic registers, DMA and iATU (internal Address - Translation Unit) registers. - - num-viewport: - $ref: /schemas/types.yaml#/definitions/uint32 - maximum: 256 - description: | - number of view ports configured in hardware. If a platform - does not specify it, the driver autodetects it. - deprecated: true + oneOf: + - description: + Basic DWC PCIe controller configuration-space accessible over + the DBI interface. This memory space is either activated with + CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region + with all spaces. Note iATU/eDMA CSRs are indirectly accessible + via the PL viewports on the DWC PCIe controllers older than + v4.80a. + const: dbi + - description: + Shadow DWC PCIe config-space registers. This space is selected + by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of + the PCI-SIG PCIe CFG-space with the shadow registers for some + PCI Header space, PCI Standard and Extended Structures. It's + mainly relevant for the end-point controller configuration, + but still there are some shadow registers available for the + Root Port mode too. + const: dbi2 + - description: + External Local Bus registers. It's an application-dependent + registers normally defined by the platform engineers. The space + can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can + be accessed over some platform-specific means (for instance + as a part of a system controller). + enum: [ elbi, app ] + - description: + iATU/eDMA registers common for all device functions. It's an + unrolled memory space with the internal Address Translation + Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1 + and CS2 = 1. For IP-core releases prior v4.80a, these registers + have been programmed via an indirect addressing scheme using a + set of viewport CSRs mapped into the PL space. Note iATU is + normally mapped to the 0x0 address of this region, while eDMA + is available at 0x80000 base address. + const: atu + - description: + Platform-specific eDMA registers. Some platforms may have eDMA + CSRs mapped in a non-standard base address. The registers offset + can be changed or the MS/LS-bits of the address can be attached + in an additional RTL block before the MEM-IO transactions reach + the DW PCIe slave interface. + const: dma + - description: + PHY/PCS configuration registers. Some platforms can have the + PCS and PHY CSRs accessible over a dedicated memory mapped + region, but mainly these registers are indirectly accessible + either by means of the embedded PHY viewport schema or by some + platform-specific method. + const: phy + - description: + Outbound iATU-capable memory-region which will be used to access + the peripheral PCIe devices configuration space. + const: config + - description: + Vendor-specific CSR names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native 'elbi/app' CSR region for details. + enum: [ apb, mgmt, link, ulreg, appl ] + - description: See native 'atu' CSR region for details. + enum: [ atu_dma ] + - description: Syscon-related CSR regions. + enum: [ smu, mpu ] + - description: Tegra234 aperture + enum: [ ecam ] + allOf: + - contains: + const: dbi + - contains: + const: config + + interrupts: + description: + DWC PCIe Root Port/Complex specific IRQ signals. At least MSI interrupt + signal is supposed to be specified for the host controller. + minItems: 1 + maxItems: 26 + + interrupt-names: + minItems: 1 + maxItems: 26 + items: + oneOf: + - description: + Controller request to read or write virtual product data + from/to the VPD capability registers. + const: vpd + - description: + Link Equalization Request flag is set in the Link Status 2 + register (applicable if the corresponding IRQ is enabled in + the Link Control 3 register). + const: l_eq + - description: + Indicates that the eDMA Tx/Rx transfer is complete or that an + error has occurred on the corresponding channel. eDMA can have + eight Tx (Write) and Rx (Read) eDMA channels thus supporting up + to 16 IRQ signals all together. Write eDMA channels shall go + first in the ordered row as per default edma_int[*] bus setup. + pattern: '^dma([0-9]|1[0-5])?$' + - description: + PCIe protocol correctable error or a Data Path protection + correctable error is detected by the automotive/safety + feature. + const: sft_ce + - description: + Indicates that the internal safety mechanism has detected an + uncorrectable error. + const: sft_ue + - description: + Application-specific IRQ raised depending on the vendor-specific + events basis. + const: app + - description: + DSP AXI MSI Interrupt detected. It gets de-asserted when there is + no more MSI interrupt pending. The interrupt is relevant to the + iMSI-RX - Integrated MSI Receiver (AXI bridge). + const: msi + - description: + Legacy A/B/C/D interrupt signal. Basically it's triggered by + receiving a Assert_INT{A,B,C,D}/Desassert_INT{A,B,C,D} message + from the downstream device. + pattern: "^int(a|b|c|d)$" + - description: + Error condition detected and a flag is set in the Root Error Status + register of the AER capability. It's asserted when the RC + internally generated an error or an error message is received by + the RC. + const: aer + - description: + PME message is received by the port. That means having the PME + status bit set in the Root Status register (the event is + supposed to be unmasked in the Root Control register). + const: pme + - description: + Hot-plug event is detected. That is a bit has been set in the + Slot Status register and the corresponding event is enabled in + the Slot Control register. + const: hp + - description: + Link Autonomous Bandwidth Status flag has been set in the Link + Status register (the event is supposed to be unmasked in the + Link Control register). + const: bw_au + - description: + Bandwidth Management Status flag has been set in the Link + Status register (the event is supposed to be unmasked in the + Link Control register). + const: bw_mg + - description: + Vendor-specific IRQ names. Consider using the generic names above + for new bindings. + oneOf: + - description: See native "app" IRQ for details + enum: [ intr ] + allOf: + - contains: + const: msi additionalProperties: true required: + - compatible - reg - reg-names - - compatible examples: - | - bus { - #address-cells = <1>; - #size-cells = <1>; - pcie@dfc00000 { - device_type = "pci"; - compatible = "snps,dw-pcie"; - reg = <0xdfc00000 0x0001000>, /* IP registers */ - <0xd0000000 0x0002000>; /* Configuration space */ - reg-names = "dbi", "config"; - #address-cells = <3>; - #size-cells = <2>; - ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>, - <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>; - interrupts = <25>, <24>; - #interrupt-cells = <1>; - num-lanes = <1>; - }; + pcie@dfc00000 { + compatible = "snps,dw-pcie"; + device_type = "pci"; + reg = <0xdfc00000 0x0001000>, /* IP registers */ + <0xd0000000 0x0002000>; /* Configuration space */ + reg-names = "dbi", "config"; + #address-cells = <3>; + #size-cells = <2>; + ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>, + <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>; + bus-range = <0x0 0xff>; + + interrupts = <25>, <24>; + interrupt-names = "msi", "hp"; + #interrupt-cells = <1>; + + reset-gpios = <&port0 0 1>; + + phys = <&pcie_phy>; + phy-names = "pcie"; + + num-lanes = <1>; + max-link-speed = <3>; }; diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml index aed437dac363a10131835cf1ca7cce5155c53afd..10e6eabdff534ac947b31209848944709091f04a 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml @@ -58,6 +58,13 @@ properties: dma-coherent: description: Indicates that the PCIe IP block can ensure the coherency + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: link_state + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml index 2115d5a3f0e14ba530573e9b6ceb0c2572274066..b0513b197d0871e32f621317a41283df584ef5e1 100644 --- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml +++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml @@ -73,9 +73,31 @@ properties: - const: 0xb00f - items: - const: 0xb010 + - items: + - const: 0xb013 msi-map: true + interrupts: + maxItems: 1 + + interrupt-names: + items: + - const: link_state + + interrupt-controller: + type: object + additionalProperties: false + + properties: + interrupt-controller: true + + '#interrupt-cells': + const: 1 + + interrupts: + maxItems: 1 + required: - compatible - reg diff --git a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml index 48ed227fc5b9e888781a9df39d52b90b1cf5f6b7..53da2edd7c9abef7d13c588af2fbb63a17d509eb 100644 --- a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml +++ b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml @@ -36,7 +36,7 @@ properties: - const: mpu interrupts: - maxItems: 1 + maxItems: 2 clocks: items: @@ -94,8 +94,9 @@ examples: #interrupt-cells = <1>; ranges = <0x81000000 0 0x40000000 0 0x40000000 0 0x00010000>, <0x82000000 0 0x50000000 0 0x50000000 0 0x20000000>; - interrupts = ; - interrupt-names = "intr"; + interrupts = , + ; + interrupt-names = "msi", "intr"; interrupt-map-mask = <0 0 0 7>; interrupt-map = <0 0 0 1 &gic GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH diff --git a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml index abd134c9d4009c9721a50f4bcc2c7342d8e2f842..e8e4ab1e5b95db1213de533f91e69bfbfe60237b 100644 --- a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml +++ b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml @@ -16,7 +16,9 @@ maintainers: properties: compatible: - const: nuvoton,npcm750-rng + enum: + - nuvoton,npcm750-rng + - nuvoton,npcm845-rng reg: maxItems: 1 diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml index 2bf5293fc9952e30ba9fea3bef7198eeb2cdd4a9..ab4df020528534c0349161c5aa95124adaf24ca7 100644 --- a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml +++ b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml @@ -21,20 +21,19 @@ properties: compatible: enum: - qcom,geni-se-qup + - qcom,geni-se-i2c-master-hub reg: description: QUP wrapper common register address and length. maxItems: 1 clock-names: - items: - - const: m-ahb - - const: s-ahb + minItems: 1 + maxItems: 2 clocks: - items: - - description: Master AHB Clock - - description: Slave AHB Clock + minItems: 1 + maxItems: 2 "#address-cells": const: 2 @@ -81,6 +80,39 @@ patternProperties: description: GENI Serial Engine based UART Controller. $ref: /schemas/serial/qcom,serial-geni-qcom.yaml# +allOf: + - if: + properties: + compatible: + contains: + const: qcom,geni-se-i2c-master-hub + then: + properties: + clock-names: + items: + - const: s-ahb + + clocks: + items: + - description: Slave AHB Clock + + iommus: false + + patternProperties: + "spi@[0-9a-f]+$": false + "serial@[0-9a-f]+$": false + else: + properties: + clock-names: + items: + - const: m-ahb + - const: s-ahb + + clocks: + items: + - description: Master AHB Clock + - description: Slave AHB Clock + additionalProperties: false examples: diff --git a/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml b/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f1fc3b0d8608544b4cae3b637451c4776a11ee10 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/thermal/generic-adc-thermal.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: General Purpose Analog To Digital Converter (ADC) based thermal sensor + +maintainers: + - Laxman Dewangan + +description: + On some of platforms, thermal sensor like thermistors are connected to + one of ADC channel and sensor resistance is read via voltage across the + sensor resistor. The voltage read across the sensor is mapped to + temperature using voltage-temperature lookup table. + +properties: + compatible: + const: generic-adc-thermal + + '#thermal-sensor-cells': + const: 0 + + io-channels: + maxItems: 1 + + io-channel-names: + const: sensor-channel + + temperature-lookup-table: + description: | + Lookup table to map the relation between ADC value and temperature. + When ADC is read, the value is looked up on the table to get the + equivalent temperature. + + If not specified, driver assumes the ADC channel gives milliCelsius + directly. + $ref: /schemas/types.yaml#/definitions/int32-matrix + items: + items: + - description: Temperature in milliCelsius + - description: ADC read value + +required: + - compatible + - '#thermal-sensor-cells' + - io-channels + - io-channel-names + +additionalProperties: false + +examples: + - | + #include + + thermal-sensor { + compatible = "generic-adc-thermal"; + #thermal-sensor-cells = <0>; + io-channels = <&ads1015 1>; + io-channel-names = "sensor-channel"; + temperature-lookup-table = < + (-40000) 2578 + (-39000) 2577 + (-38000) 2576 + (-37000) 2575 + (-36000) 2574 + (-35000) 2573 + (-34000) 2572 + (-33000) 2571 + (-32000) 2569 + (-31000) 2568 + (-30000) 2567 + /* skip */ + 118000 254 + 119000 247 + 120000 240 + 121000 233 + 122000 226 + 123000 220 + 124000 214 + 125000 208>; + }; +... diff --git a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml index 89c54e08ee61b8a3c19502996573203444975e43..b90726229ac9c4af99f826d3b38ff8182e3e7095 100644 --- a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml @@ -32,6 +32,13 @@ properties: clocks: maxItems: 1 + nvmem-cells: + maxItems: 1 + description: Phandle to the calibration data provided by ocotp + + nvmem-cell-names: + const: calib + "#thermal-sensor-cells": description: | Number of cells required to uniquely identify the thermal diff --git a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt index 5c7e7bdd029abfab9cb14c3076afa9b647d596c8..38b32bb447e39ab9e24ba1a775290b2b96725787 100644 --- a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt +++ b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt @@ -13,6 +13,8 @@ Required properties: - "mediatek,mt2701-thermal" : For MT2701 family of SoCs - "mediatek,mt2712-thermal" : For MT2712 family of SoCs - "mediatek,mt7622-thermal" : For MT7622 SoC + - "mediatek,mt7981-thermal", "mediatek,mt7986-thermal" : For MT7981 SoC + - "mediatek,mt7986-thermal" : For MT7986 SoC - "mediatek,mt8183-thermal" : For MT8183 family of SoCs - "mediatek,mt8516-thermal", "mediatek,mt2701-thermal : For MT8516 family of SoCs - reg: Address range of the thermal controller diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml index c41fcf4041176633e012cb96378b9d86f9439c50..0231f187b09775bdd7eba1680130439bc4c745a7 100644 --- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml +++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml @@ -58,8 +58,14 @@ properties: - qcom,sm8150-tsens - qcom,sm8250-tsens - qcom,sm8350-tsens + - qcom,sm8450-tsens + - qcom,sm8550-tsens - const: qcom,tsens-v2 + - description: v2 of TSENS with combined interrupt + enum: + - qcom,ipq8074-tsens + reg: items: - description: TM registers @@ -67,15 +73,11 @@ properties: interrupts: minItems: 1 - items: - - description: Combined interrupt if upper or lower threshold crossed - - description: Interrupt if critical threshold crossed + maxItems: 2 interrupt-names: minItems: 1 - items: - - const: uplow - - const: critical + maxItems: 2 nvmem-cells: minItems: 1 @@ -129,22 +131,64 @@ allOf: then: properties: interrupts: - maxItems: 1 + items: + - description: Combined interrupt if upper or lower threshold crossed interrupt-names: - maxItems: 1 + items: + - const: uplow - else: + - if: + properties: + compatible: + contains: + enum: + - qcom,msm8953-tsens + - qcom,msm8996-tsens + - qcom,msm8998-tsens + - qcom,sc7180-tsens + - qcom,sc7280-tsens + - qcom,sc8180x-tsens + - qcom,sc8280xp-tsens + - qcom,sdm630-tsens + - qcom,sdm845-tsens + - qcom,sm6350-tsens + - qcom,sm8150-tsens + - qcom,sm8250-tsens + - qcom,sm8350-tsens + - qcom,sm8450-tsens + - qcom,tsens-v2 + then: properties: interrupts: - minItems: 2 + items: + - description: Combined interrupt if upper or lower threshold crossed + - description: Interrupt if critical threshold crossed interrupt-names: - minItems: 2 + items: + - const: uplow + - const: critical - if: properties: compatible: contains: enum: + - qcom,ipq8074-tsens + then: + properties: + interrupts: + items: + - description: Combined interrupt if upper, lower or critical thresholds crossed + interrupt-names: + items: + - const: combined + + - if: + properties: + compatible: + contains: + enum: + - qcom,ipq8074-tsens - qcom,tsens-v0_1 - qcom,tsens-v1 - qcom,tsens-v2 @@ -227,4 +271,19 @@ examples: #qcom,sensors = <13>; #thermal-sensor-cells = <1>; }; + + - | + #include + // Example 4 (for any IPQ8074 based SoC-s): + tsens4: thermal-sensor@4a9000 { + compatible = "qcom,ipq8074-tsens"; + reg = <0x4a9000 0x1000>, + <0x4a8000 0x1000>; + + interrupts = ; + interrupt-names = "combined"; + + #qcom,sensors = <16>; + #thermal-sensor-cells = <1>; + }; ... diff --git a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml index 1d837339784817ea610767a467c73f9e0048f17f..03f4b926e53c96b253c8c75446e8f9f336e6f516 100644 --- a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml @@ -17,7 +17,7 @@ properties: compatible: items: - enum: - - renesas,r9a07g043-tsu # RZ/G2UL + - renesas,r9a07g043-tsu # RZ/G2UL and RZ/Five - renesas,r9a07g044-tsu # RZ/G2{L,LC} - renesas,r9a07g054-tsu # RZ/V2L - const: renesas,rzg2l-tsu diff --git a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt b/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt deleted file mode 100644 index e136946a2f4fd9f42c2736a6e0926166bdc82e2c..0000000000000000000000000000000000000000 --- a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt +++ /dev/null @@ -1,95 +0,0 @@ -General Purpose Analog To Digital Converter (ADC) based thermal sensor. - -On some of platforms, thermal sensor like thermistors are connected to -one of ADC channel and sensor resistance is read via voltage across the -sensor resistor. The voltage read across the sensor is mapped to -temperature using voltage-temperature lookup table. - -Required properties: -=================== -- compatible: Must be "generic-adc-thermal". -- #thermal-sensor-cells: Should be 1. See Documentation/devicetree/bindings/thermal/thermal-sensor.yaml for a description - of this property. -Optional properties: -=================== -- temperature-lookup-table: Two dimensional array of Integer; lookup table - to map the relation between ADC value and - temperature. When ADC is read, the value is - looked up on the table to get the equivalent - temperature. - - The first value of the each row of array is the - temperature in milliCelsius and second value of - the each row of array is the ADC read value. - - If not specified, driver assumes the ADC channel - gives milliCelsius directly. - -Example : -#include - -i2c@7000c400 { - ads1015: ads1015@4a { - reg = <0x4a>; - compatible = "ads1015"; - sampling-frequency = <3300>; - #io-channel-cells = <1>; - }; -}; - -tboard_thermistor: thermal-sensor { - compatible = "generic-adc-thermal"; - #thermal-sensor-cells = <0>; - io-channels = <&ads1015 1>; - io-channel-names = "sensor-channel"; - temperature-lookup-table = < (-40000) 2578 - (-39000) 2577 - (-38000) 2576 - (-37000) 2575 - (-36000) 2574 - (-35000) 2573 - (-34000) 2572 - (-33000) 2571 - (-32000) 2569 - (-31000) 2568 - (-30000) 2567 - :::::::::: - 118000 254 - 119000 247 - 120000 240 - 121000 233 - 122000 226 - 123000 220 - 124000 214 - 125000 208>; -}; - -dummy_cool_dev: dummy-cool-dev { - compatible = "dummy-cooling-dev"; - #cooling-cells = <2>; /* min followed by max */ -}; - -thermal-zones { - Tboard { - polling-delay = <15000>; /* milliseconds */ - polling-delay-passive = <0>; /* milliseconds */ - thermal-sensors = <&tboard_thermistor>; - - trips { - therm_est_trip: therm_est_trip { - temperature = <40000>; - type = "active"; - hysteresis = <1000>; - }; - }; - - cooling-maps { - map0 { - trip = <&therm_est_trip>; - cooling-device = <&dummy_cool_dev THERMAL_NO_LIMIT THERMAL_NO_LIMIT>; - contribution = <100>; - }; - - }; - }; -}; diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml index c74f124ebfc00120fce35961e994dbee19cfe5db..0509c9cec224deb3674e39dc60c2b245700f2c45 100644 --- a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml +++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml @@ -9,6 +9,19 @@ title: Texas Instruments J72XX VTM (DTS) binding maintainers: - Keerthy +description: | + The TI K3 family of SoCs typically have a Voltage & Thermal + Management (VTM) device to control up to 8 temperature diode + sensors to measure silicon junction temperatures from different + hotspots of the chip as well as provide temperature, interrupt + and alerting information. + + The following polynomial equation can then be used to convert + value returned by this device into a temperature in Celsius + + Temp(C) = (-9.2627e-12) * x^4 + (6.0373e-08) * x^3 + \ + (-1.7058e-04) * x^2 + (3.2512e-01) * x + (-4.9003e+01) + properties: compatible: enum: @@ -19,7 +32,12 @@ properties: items: - description: VTM cfg1 register space - description: VTM cfg2 register space - - description: VTM efuse register space + - description: | + A software trimming method must be applied to some Jacinto + devices to function properly. This eFuse region provides + the information needed for these SoCs to report + temperatures accurately. + minItems: 2 power-domains: maxItems: 1 @@ -27,6 +45,21 @@ properties: "#thermal-sensor-cells": const: 1 +allOf: + - if: + properties: + compatible: + contains: + const: ti,j721e-vtm + then: + properties: + reg: + minItems: 3 + else: + properties: + reg: + maxItems: 2 + required: - compatible - reg diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst index 9b12eeb89170f3d9cfa0cb2347a33928d60bec5f..e17910cc3271937c198859a9adcea7c1f3842831 100644 --- a/Documentation/driver-api/gpio/legacy.rst +++ b/Documentation/driver-api/gpio/legacy.rst @@ -558,11 +558,6 @@ Platform Support To force-enable this framework, a platform's Kconfig will "select" GPIOLIB, else it is up to the user to configure support for GPIO. -It may also provide a custom value for ARCH_NR_GPIOS, so that it better -reflects the number of GPIOs in actual use on that platform, without -wasting static table space. (It should count both built-in/SoC GPIOs and -also ones on GPIO expanders. - If neither of these options are selected, the platform does not support GPIOs through GPIO-lib and the code cannot be enabled by the user. diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index 82b0be425775766c27516db1a7149eb863f02dc6..34647d9bdca46e2ce670dafb3af926b4768b6f7f 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -21,7 +21,7 @@ | openrisc: | TODO | | parisc: | TODO | | powerpc: | ok | - | riscv: | TODO | + | riscv: | ok | | s390: | TODO | | sh: | TODO | | sparc: | TODO | diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst index 17df9a02ccff14a17be195730ff5ce71ffeb79ae..220f3e0d3f559f47ab61b46b6e44c7754471b941 100644 --- a/Documentation/filesystems/f2fs.rst +++ b/Documentation/filesystems/f2fs.rst @@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs). - git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git -For reporting bugs and sending patches, please use the following mailing list: +For sending patches, please use the following mailing list: - linux-f2fs-devel@lists.sourceforge.net +For reporting bugs, please use the following f2fs bug tracker link: + +- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs + Background and Design issues ============================ @@ -154,6 +158,8 @@ nobarrier This option can be used if underlying storage guarantees If this option is set, no cache_flush commands are issued but f2fs still guarantees the write ordering of all the data writes. +barrier If this option is set, cache_flush commands are allowed to be + issued. fastboot This option is used when a system wants to reduce mount time as much as possible, even though normal performance can be sacrificed. @@ -199,6 +205,7 @@ fault_type=%d Support configuring fault injection type, should be FAULT_SLAB_ALLOC 0x000008000 FAULT_DQUOT_INIT 0x000010000 FAULT_LOCK_OP 0x000020000 + FAULT_BLKADDR 0x000040000 =================== =========== mode=%s Control block allocation mode which supports "adaptive" and "lfs". In "lfs" mode, there should be no random @@ -340,6 +347,10 @@ memory=%s Control memory mode. This supports "normal" and "low" modes. Because of the nature of low memory devices, in this mode, f2fs will try to save memory sometimes by sacrificing performance. "normal" mode is the default mode and same as before. +age_extent_cache Enable an age extent cache based on rb-tree. It records + data block update frequency of the extent per inode, in + order to provide better temperature hints for data block + allocation. ======================== ============================================================ Debugfs Entries diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst index dfe0ac5624fb240ce697a5067666cf46b2dc7209..07d5a5623e2a3e87e5244b94b789c8174a983efe 100644 --- a/Documentation/riscv/patch-acceptance.rst +++ b/Documentation/riscv/patch-acceptance.rst @@ -20,16 +20,22 @@ Submit Checklist Addendum ------------------------- We'll only accept patches for new modules or extensions if the specifications for those modules or extensions are listed as being -"Frozen" or "Ratified" by the RISC-V Foundation. (Developers may, of -course, maintain their own Linux kernel trees that contain code for -any draft extensions that they wish.) +unlikely to be incompatibly changed in the future. For +specifications from the RISC-V foundation this means "Frozen" or +"Ratified", for the UEFI forum specifications this means a published +ECR. (Developers may, of course, maintain their own Linux kernel trees +that contain code for any draft extensions that they wish.) -Additionally, the RISC-V specification allows implementors to create +Additionally, the RISC-V specification allows implementers to create their own custom extensions. These custom extensions aren't required to go through any review or ratification process by the RISC-V Foundation. To avoid the maintenance complexity and potential performance impact of adding kernel code for implementor-specific -RISC-V extensions, we'll only to accept patches for extensions that -have been officially frozen or ratified by the RISC-V Foundation. -(Implementors, may, of course, maintain their own Linux kernel trees -containing code for any custom extensions that they wish.) +RISC-V extensions, we'll only consider patches for extensions that either: + +- Have been officially frozen or ratified by the RISC-V Foundation, or +- Have been implemented in hardware that is widely available, per standard + Linux practice. + +(Implementers, may, of course, maintain their own Linux kernel trees containing +code for any custom extensions that they wish.) diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst index c78da9ce0ec44e5c9feca7ee262e31ed668b9d99..f16337bdb8520fac879bc4aee481bf7cfe8b0ce4 100644 --- a/Documentation/userspace-api/index.rst +++ b/Documentation/userspace-api/index.rst @@ -25,6 +25,7 @@ place where this information is gathered. ebpf/index ioctl/index iommu + iommufd media/index netlink/index sysfs-platform_profile diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index 5f81e2a24a5c04221e49af03fcdde934a35cc855..eb045fc495a4e3426604eedde78fd5656d6b6920 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -105,6 +105,7 @@ Code Seq# Include File Comments '8' all SNP8023 advanced NIC card ';' 64-7F linux/vfio.h +';' 80-FF linux/iommufd.h '=' 00-3f uapi/linux/ptp_clock.h '@' 00-0F linux/radeonfb.h conflict! '@' 00-0F drivers/video/aty/aty128fb.c conflict! diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst new file mode 100644 index 0000000000000000000000000000000000000000..79dd9eb515874d6fb6029b6ae848daf05ffeec81 --- /dev/null +++ b/Documentation/userspace-api/iommufd.rst @@ -0,0 +1,223 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +======= +IOMMUFD +======= + +:Author: Jason Gunthorpe +:Author: Kevin Tian + +Overview +======== + +IOMMUFD is the user API to control the IOMMU subsystem as it relates to managing +IO page tables from userspace using file descriptors. It intends to be general +and consumable by any driver that wants to expose DMA to userspace. These +drivers are eventually expected to deprecate any internal IOMMU logic +they may already/historically implement (e.g. vfio_iommu_type1.c). + +At minimum iommufd provides universal support of managing I/O address spaces and +I/O page tables for all IOMMUs, with room in the design to add non-generic +features to cater to specific hardware functionality. + +In this context the capital letter (IOMMUFD) refers to the subsystem while the +small letter (iommufd) refers to the file descriptors created via /dev/iommu for +use by userspace. + +Key Concepts +============ + +User Visible Objects +-------------------- + +Following IOMMUFD objects are exposed to userspace: + +- IOMMUFD_OBJ_IOAS, representing an I/O address space (IOAS), allowing map/unmap + of user space memory into ranges of I/O Virtual Address (IOVA). + + The IOAS is a functional replacement for the VFIO container, and like the VFIO + container it copies an IOVA map to a list of iommu_domains held within it. + +- IOMMUFD_OBJ_DEVICE, representing a device that is bound to iommufd by an + external driver. + +- IOMMUFD_OBJ_HW_PAGETABLE, representing an actual hardware I/O page table + (i.e. a single struct iommu_domain) managed by the iommu driver. + + The IOAS has a list of HW_PAGETABLES that share the same IOVA mapping and + it will synchronize its mapping with each member HW_PAGETABLE. + +All user-visible objects are destroyed via the IOMMU_DESTROY uAPI. + +The diagram below shows relationship between user-visible objects and kernel +datastructures (external to iommufd), with numbers referred to operations +creating the objects and links:: + + _________________________________________________________ + | iommufd | + | [1] | + | _________________ | + | | | | + | | | | + | | | | + | | | | + | | | | + | | | | + | | | [3] [2] | + | | | ____________ __________ | + | | IOAS |<--| |<------| | | + | | | |HW_PAGETABLE| | DEVICE | | + | | | |____________| |__________| | + | | | | | | + | | | | | | + | | | | | | + | | | | | | + | | | | | | + | |_________________| | | | + | | | | | + |_________|___________________|___________________|_______| + | | | + | _____v______ _______v_____ + | PFN storage | | | | + |------------>|iommu_domain| |struct device| + |____________| |_____________| + +1. IOMMUFD_OBJ_IOAS is created via the IOMMU_IOAS_ALLOC uAPI. An iommufd can + hold multiple IOAS objects. IOAS is the most generic object and does not + expose interfaces that are specific to single IOMMU drivers. All operations + on the IOAS must operate equally on each of the iommu_domains inside of it. + +2. IOMMUFD_OBJ_DEVICE is created when an external driver calls the IOMMUFD kAPI + to bind a device to an iommufd. The driver is expected to implement a set of + ioctls to allow userspace to initiate the binding operation. Successful + completion of this operation establishes the desired DMA ownership over the + device. The driver must also set the driver_managed_dma flag and must not + touch the device until this operation succeeds. + +3. IOMMUFD_OBJ_HW_PAGETABLE is created when an external driver calls the IOMMUFD + kAPI to attach a bound device to an IOAS. Similarly the external driver uAPI + allows userspace to initiate the attaching operation. If a compatible + pagetable already exists then it is reused for the attachment. Otherwise a + new pagetable object and iommu_domain is created. Successful completion of + this operation sets up the linkages among IOAS, device and iommu_domain. Once + this completes the device could do DMA. + + Every iommu_domain inside the IOAS is also represented to userspace as a + HW_PAGETABLE object. + + .. note:: + + Future IOMMUFD updates will provide an API to create and manipulate the + HW_PAGETABLE directly. + +A device can only bind to an iommufd due to DMA ownership claim and attach to at +most one IOAS object (no support of PASID yet). + +Kernel Datastructure +-------------------- + +User visible objects are backed by following datastructures: + +- iommufd_ioas for IOMMUFD_OBJ_IOAS. +- iommufd_device for IOMMUFD_OBJ_DEVICE. +- iommufd_hw_pagetable for IOMMUFD_OBJ_HW_PAGETABLE. + +Several terminologies when looking at these datastructures: + +- Automatic domain - refers to an iommu domain created automatically when + attaching a device to an IOAS object. This is compatible to the semantics of + VFIO type1. + +- Manual domain - refers to an iommu domain designated by the user as the + target pagetable to be attached to by a device. Though currently there are + no uAPIs to directly create such domain, the datastructure and algorithms + are ready for handling that use case. + +- In-kernel user - refers to something like a VFIO mdev that is using the + IOMMUFD access interface to access the IOAS. This starts by creating an + iommufd_access object that is similar to the domain binding a physical device + would do. The access object will then allow converting IOVA ranges into struct + page * lists, or doing direct read/write to an IOVA. + +iommufd_ioas serves as the metadata datastructure to manage how IOVA ranges are +mapped to memory pages, composed of: + +- struct io_pagetable holding the IOVA map +- struct iopt_area's representing populated portions of IOVA +- struct iopt_pages representing the storage of PFNs +- struct iommu_domain representing the IO page table in the IOMMU +- struct iopt_pages_access representing in-kernel users of PFNs +- struct xarray pinned_pfns holding a list of pages pinned by in-kernel users + +Each iopt_pages represents a logical linear array of full PFNs. The PFNs are +ultimately derived from userspace VAs via an mm_struct. Once they have been +pinned the PFNs are stored in IOPTEs of an iommu_domain or inside the pinned_pfns +xarray if they have been pinned through an iommufd_access. + +PFN have to be copied between all combinations of storage locations, depending +on what domains are present and what kinds of in-kernel "software access" users +exist. The mechanism ensures that a page is pinned only once. + +An io_pagetable is composed of iopt_areas pointing at iopt_pages, along with a +list of iommu_domains that mirror the IOVA to PFN map. + +Multiple io_pagetable-s, through their iopt_area-s, can share a single +iopt_pages which avoids multi-pinning and double accounting of page +consumption. + +iommufd_ioas is sharable between subsystems, e.g. VFIO and VDPA, as long as +devices managed by different subsystems are bound to a same iommufd. + +IOMMUFD User API +================ + +.. kernel-doc:: include/uapi/linux/iommufd.h + +IOMMUFD Kernel API +================== + +The IOMMUFD kAPI is device-centric with group-related tricks managed behind the +scene. This allows the external drivers calling such kAPI to implement a simple +device-centric uAPI for connecting its device to an iommufd, instead of +explicitly imposing the group semantics in its uAPI as VFIO does. + +.. kernel-doc:: drivers/iommu/iommufd/device.c + :export: + +.. kernel-doc:: drivers/iommu/iommufd/main.c + :export: + +VFIO and IOMMUFD +---------------- + +Connecting a VFIO device to iommufd can be done in two ways. + +First is a VFIO compatible way by directly implementing the /dev/vfio/vfio +container IOCTLs by mapping them into io_pagetable operations. Doing so allows +the use of iommufd in legacy VFIO applications by symlinking /dev/vfio/vfio to +/dev/iommufd or extending VFIO to SET_CONTAINER using an iommufd instead of a +container fd. + +The second approach directly extends VFIO to support a new set of device-centric +user API based on aforementioned IOMMUFD kernel API. It requires userspace +change but better matches the IOMMUFD API semantics and easier to support new +iommufd features when comparing it to the first approach. + +Currently both approaches are still work-in-progress. + +There are still a few gaps to be resolved to catch up with VFIO type1, as +documented in iommufd_vfio_check_extension(). + +Future TODOs +============ + +Currently IOMMUFD supports only kernel-managed I/O page table, similar to VFIO +type1. New features on the radar include: + + - Binding iommu_domain's to PASID/SSID + - Userspace page tables, for ARM, x86 and S390 + - Kernel bypass'd invalidation of user page tables + - Re-use of the KVM page table in the IOMMU + - Dirty page tracking in the IOMMU + - Runtime Increase/Decrease of IOPTE size + - PRI support with faults resolved in userspace diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 896914e3a8475a12fe3b841c736566726ba47157..0dd5d8733dd583ff77a4ef9072831e12e289a3b1 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -272,18 +272,6 @@ the VCPU file descriptor can be mmap-ed, including: KVM_CAP_DIRTY_LOG_RING, see section 8.3. -4.6 KVM_SET_MEMORY_REGION -------------------------- - -:Capability: basic -:Architectures: all -:Type: vm ioctl -:Parameters: struct kvm_memory_region (in) -:Returns: 0 on success, -1 on error - -This ioctl is obsolete and has been removed. - - 4.7 KVM_CREATE_VCPU ------------------- @@ -368,17 +356,6 @@ see the description of the capability. Note that the Xen shared info page, if configured, shall always be assumed to be dirty. KVM will not explicitly mark it such. -4.9 KVM_SET_MEMORY_ALIAS ------------------------- - -:Capability: basic -:Architectures: x86 -:Type: vm ioctl -:Parameters: struct kvm_memory_alias (in) -:Returns: 0 (success), -1 (error) - -This ioctl is obsolete and has been removed. - 4.10 KVM_RUN ------------ @@ -1332,7 +1309,7 @@ yet and must be cleared on entry. __u64 userspace_addr; /* start of the userspace allocated memory */ }; - /* for kvm_memory_region::flags */ + /* for kvm_userspace_memory_region::flags */ #define KVM_MEM_LOG_DIRTY_PAGES (1UL << 0) #define KVM_MEM_READONLY (1UL << 1) @@ -1377,10 +1354,6 @@ the memory region are automatically reflected into the guest. For example, an mmap() that affects the region will be made visible immediately. Another example is madvise(MADV_DROP). -It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl. -The KVM_SET_MEMORY_REGION does not allow fine grained control over memory -allocation and is deprecated. - 4.36 KVM_SET_TSS_ADDR --------------------- @@ -3293,6 +3266,7 @@ valid entries found. ---------------------- :Capability: KVM_CAP_DEVICE_CTRL +:Architectures: all :Type: vm ioctl :Parameters: struct kvm_create_device (in/out) :Returns: 0 on success, -1 on error @@ -3333,6 +3307,7 @@ number. :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device, KVM_CAP_VCPU_ATTRIBUTES for vcpu device KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set) +:Architectures: x86, arm64, s390 :Type: device ioctl, vm ioctl, vcpu ioctl :Parameters: struct kvm_device_attr :Returns: 0 on success, -1 on error @@ -4104,80 +4079,71 @@ flags values for ``struct kvm_msr_filter_range``: ``KVM_MSR_FILTER_READ`` Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap - indicates that a read should immediately fail, while a 1 indicates that - a read for a particular MSR should be handled regardless of the default + indicates that read accesses should be denied, while a 1 indicates that + a read for a particular MSR should be allowed regardless of the default filter action. ``KVM_MSR_FILTER_WRITE`` Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap - indicates that a write should immediately fail, while a 1 indicates that - a write for a particular MSR should be handled regardless of the default + indicates that write accesses should be denied, while a 1 indicates that + a write for a particular MSR should be allowed regardless of the default filter action. -``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE`` - - Filter both read and write accesses to MSRs using the given bitmap. A 0 - in the bitmap indicates that both reads and writes should immediately fail, - while a 1 indicates that reads and writes for a particular MSR are not - filtered by this range. - flags values for ``struct kvm_msr_filter``: ``KVM_MSR_FILTER_DEFAULT_ALLOW`` If no filter range matches an MSR index that is getting accessed, KVM will - fall back to allowing access to the MSR. + allow accesses to all MSRs by default. ``KVM_MSR_FILTER_DEFAULT_DENY`` If no filter range matches an MSR index that is getting accessed, KVM will - fall back to rejecting access to the MSR. In this mode, all MSRs that should - be processed by KVM need to explicitly be marked as allowed in the bitmaps. + deny accesses to all MSRs by default. + +This ioctl allows userspace to define up to 16 bitmaps of MSR ranges to deny +guest MSR accesses that would normally be allowed by KVM. If an MSR is not +covered by a specific range, the "default" filtering behavior applies. Each +bitmap range covers MSRs from [base .. base+nmsrs). -This ioctl allows user space to define up to 16 bitmaps of MSR ranges to -specify whether a certain MSR access should be explicitly filtered for or not. +If an MSR access is denied by userspace, the resulting KVM behavior depends on +whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is +enabled. If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace +on denied accesses, i.e. userspace effectively intercepts the MSR access. If +KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest +on denied accesses. -If this ioctl has never been invoked, MSR accesses are not guarded and the -default KVM in-kernel emulation behavior is fully preserved. +If an MSR access is allowed by userspace, KVM will emulate and/or virtualize +the access in accordance with the vCPU model. Note, KVM may still ultimately +inject a #GP if an access is allowed by userspace, e.g. if KVM doesn't support +the MSR, or to follow architectural behavior for the MSR. + +By default, KVM operates in KVM_MSR_FILTER_DEFAULT_ALLOW mode with no MSR range +filters. Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes an error. -As soon as the filtering is in place, every MSR access is processed through -the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff); -x2APIC MSRs are always allowed, independent of the ``default_allow`` setting, -and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base -register. - .. warning:: - MSR accesses coming from nested vmentry/vmexit are not filtered. + MSR accesses as part of nested VM-Enter/VM-Exit are not filtered. This includes both writes to individual VMCS fields and reads/writes through the MSR lists pointed to by the VMCS. -If a bit is within one of the defined ranges, read and write accesses are -guarded by the bitmap's value for the MSR index if the kind of access -is included in the ``struct kvm_msr_filter_range`` flags. If no range -cover this particular access, the behavior is determined by the flags -field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW`` -and ``KVM_MSR_FILTER_DEFAULT_DENY``. - -Each bitmap range specifies a range of MSRs to potentially allow access on. -The range goes from MSR index [base .. base+nmsrs]. The flags field -indicates whether reads, writes or both reads and writes are filtered -by setting a 1 bit in the bitmap for the corresponding MSR index. - -If an MSR access is not permitted through the filtering, it generates a -#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that -allows user space to deflect and potentially handle various MSR accesses -into user space. + x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that + cover any x2APIC MSRs). Note, invoking this ioctl while a vCPU is running is inherently racy. However, KVM does guarantee that vCPUs will see either the previous filter or the new filter, e.g. MSRs with identical settings in both the old and new filter will have deterministic behavior. +Similarly, if userspace wishes to intercept on denied accesses, +KVM_MSR_EXIT_REASON_FILTER must be enabled before activating any filters, and +left enabled until after all filters are deactivated. Failure to do so may +result in KVM injecting a #GP instead of exiting to userspace. + 4.98 KVM_CREATE_SPAPR_TCE_64 ---------------------------- @@ -5163,10 +5129,13 @@ KVM_PV_ENABLE ===== ============================= KVM_PV_DISABLE - Deregister the VM from the Ultravisor and reclaim the memory that - had been donated to the Ultravisor, making it usable by the kernel - again. All registered VCPUs are converted back to non-protected - ones. + Deregister the VM from the Ultravisor and reclaim the memory that had + been donated to the Ultravisor, making it usable by the kernel again. + All registered VCPUs are converted back to non-protected ones. If a + previous protected VM had been prepared for asynchonous teardown with + KVM_PV_ASYNC_CLEANUP_PREPARE and not subsequently torn down with + KVM_PV_ASYNC_CLEANUP_PERFORM, it will be torn down in this call + together with the current protected VM. KVM_PV_VM_SET_SEC_PARMS Pass the image header from VM memory to the Ultravisor in @@ -5289,6 +5258,36 @@ KVM_PV_DUMP authentication tag all of which are needed to decrypt the dump at a later time. +KVM_PV_ASYNC_CLEANUP_PREPARE + :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE + + Prepare the current protected VM for asynchronous teardown. Most + resources used by the current protected VM will be set aside for a + subsequent asynchronous teardown. The current protected VM will then + resume execution immediately as non-protected. There can be at most + one protected VM prepared for asynchronous teardown at any time. If + a protected VM had already been prepared for teardown without + subsequently calling KVM_PV_ASYNC_CLEANUP_PERFORM, this call will + fail. In that case, the userspace process should issue a normal + KVM_PV_DISABLE. The resources set aside with this call will need to + be cleaned up with a subsequent call to KVM_PV_ASYNC_CLEANUP_PERFORM + or KVM_PV_DISABLE, otherwise they will be cleaned up when KVM + terminates. KVM_PV_ASYNC_CLEANUP_PREPARE can be called again as soon + as cleanup starts, i.e. before KVM_PV_ASYNC_CLEANUP_PERFORM finishes. + +KVM_PV_ASYNC_CLEANUP_PERFORM + :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE + + Tear down the protected VM previously prepared for teardown with + KVM_PV_ASYNC_CLEANUP_PREPARE. The resources that had been set aside + will be freed during the execution of this command. This PV command + should ideally be issued by userspace from a separate thread. If a + fatal signal is received (or the process terminates naturally), the + command will terminate immediately without completing, and the normal + KVM shutdown procedure will take care of cleaning up all remaining + protected VMs, including the ones whose teardown was interrupted by + process termination. + 4.126 KVM_XEN_HVM_SET_ATTR -------------------------- @@ -5306,6 +5305,7 @@ KVM_PV_DUMP union { __u8 long_mode; __u8 vector; + __u8 runstate_update_flag; struct { __u64 gfn; } shared_info; @@ -5383,6 +5383,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION event channel delivery, so responding within the kernel without exiting to userspace is beneficial. +KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG + This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates + support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the + XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read + other vCPUs' vcpu_runstate_info. Xen guests enable this feature via + the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist + hypercall. + 4.127 KVM_XEN_HVM_GET_ATTR -------------------------- @@ -6440,31 +6448,35 @@ if it decides to decode and emulate the instruction. Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code -will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR +may instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR exit for writes. -The "reason" field specifies why the MSR trap occurred. User space will only -receive MSR exit traps when a particular reason was requested during through +The "reason" field specifies why the MSR interception occurred. Userspace will +only receive MSR exits when a particular reason was requested during through ENABLE_CAP. Currently valid exit reasons are: - KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM - KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits - KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER +============================ ======================================== + KVM_MSR_EXIT_REASON_UNKNOWN access to MSR that is unknown to KVM + KVM_MSR_EXIT_REASON_INVAL access to invalid MSRs or reserved bits + KVM_MSR_EXIT_REASON_FILTER access blocked by KVM_X86_SET_MSR_FILTER +============================ ======================================== -For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest -wants to read. To respond to this request with a successful read, user space +For KVM_EXIT_X86_RDMSR, the "index" field tells userspace which MSR the guest +wants to read. To respond to this request with a successful read, userspace writes the respective data into the "data" field and must continue guest execution to ensure the read data is transferred into guest register state. -If the RDMSR request was unsuccessful, user space indicates that with a "1" in +If the RDMSR request was unsuccessful, userspace indicates that with a "1" in the "error" field. This will inject a #GP into the guest when the VCPU is executed again. -For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest -wants to write. Once finished processing the event, user space must continue -vCPU execution. If the MSR write was unsuccessful, user space also sets the +For KVM_EXIT_X86_WRMSR, the "index" field tells userspace which MSR the guest +wants to write. Once finished processing the event, userspace must continue +vCPU execution. If the MSR write was unsuccessful, userspace also sets the "error" field to "1". +See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering. + :: @@ -7229,19 +7241,29 @@ polling. :Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report :Returns: 0 on success; -1 on error -This capability enables trapping of #GP invoking RDMSR and WRMSR instructions -into user space. +This capability allows userspace to intercept RDMSR and WRMSR instructions if +access to an MSR is denied. By default, KVM injects #GP on denied accesses. When a guest requests to read or write an MSR, KVM may not implement all MSRs that are relevant to a respective system. It also does not differentiate by CPU type. -To allow more fine grained control over MSR handling, user space may enable +To allow more fine grained control over MSR handling, userspace may enable this capability. With it enabled, MSR accesses that match the mask specified in -args[0] and trigger a #GP event inside the guest by KVM will instead trigger -KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space -can then handle to implement model specific MSR handling and/or user notifications -to inform a user that an MSR was not handled. +args[0] and would trigger a #GP inside the guest will instead trigger +KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications. Userspace +can then implement model specific MSR handling and/or user notifications +to inform a user that an MSR was not emulated/virtualized by KVM. + +The valid mask flags are: + +============================ =============================================== + KVM_MSR_EXIT_REASON_UNKNOWN intercept accesses to unknown (to KVM) MSRs + KVM_MSR_EXIT_REASON_INVAL intercept accesses that are architecturally + invalid according to the vCPU model and/or mode + KVM_MSR_EXIT_REASON_FILTER intercept accesses that are denied by userspace + via KVM_X86_SET_MSR_FILTER +============================ =============================================== 7.22 KVM_CAP_X86_BUS_LOCK_EXIT ------------------------------- @@ -7384,8 +7406,9 @@ hibernation of the host; however the VMM needs to manually save/restore the tags as appropriate if the VM is migrated. When this capability is enabled all memory in memslots must be mapped as -not-shareable (no MAP_SHARED), attempts to create a memslot with a -MAP_SHARED mmap will result in an -EINVAL return. +``MAP_ANONYMOUS`` or with a RAM-based file mapping (``tmpfs``, ``memfd``), +attempts to create a memslot with an invalid mmap will result in an +-EINVAL return. When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to perform a bulk copy of tags to/from the guest. @@ -7901,7 +7924,7 @@ KVM_EXIT_X86_WRMSR exit notifications. This capability indicates that KVM supports that accesses to user defined MSRs may be rejected. With this capability exposed, KVM exports new VM ioctl KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR -ranges that KVM should reject access to. +ranges that KVM should deny access to. In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to trap and emulate MSRs that are outside of the scope of KVM as well as @@ -7920,7 +7943,7 @@ regardless of what has actually been exposed through the CPUID leaf. 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL ---------------------------------------------------------- -:Architectures: x86 +:Architectures: x86, arm64 :Parameters: args[0] - size of the dirty log ring KVM is capable of tracking dirty memory using ring buffers that are @@ -8002,13 +8025,6 @@ flushing is done by the KVM_GET_DIRTY_LOG ioctl). To achieve that, one needs to kick the vcpu out of KVM_RUN using a signal. The resulting vmexit ensures that all dirty GFNs are flushed to the dirty rings. -NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding -ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls -KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling -KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual -machine will switch to ring-buffer dirty page tracking and further -KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. - NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that should be exposed by weakly ordered architecture, in order to indicate the additional memory ordering requirements imposed on userspace when @@ -8017,6 +8033,33 @@ Architecture with TSO-like ordering (such as x86) are allowed to expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL to userspace. +After enabling the dirty rings, the userspace needs to detect the +capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the +ring structures can be backed by per-slot bitmaps. With this capability +advertised, it means the architecture can dirty guest pages without +vcpu/ring context, so that some of the dirty information will still be +maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP +can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL +hasn't been enabled, or any memslot has been existing. + +Note that the bitmap here is only a backup of the ring structure. The +use of the ring and bitmap combination is only beneficial if there is +only a very small amount of memory that is dirtied out of vcpu/ring +context. Otherwise, the stand-alone per-slot bitmap mechanism needs to +be considered. + +To collect dirty bits in the backup bitmap, userspace can use the same +KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all +the generation of the dirty bits is done in a single pass. Collecting +the dirty bitmap should be the very last thing that the VMM does before +considering the state as complete. VMM needs to ensure that the dirty +state is final and avoid missing dirty pages from another ioctl ordered +after the bitmap collection. + +NOTE: One example of using the backup bitmap is saving arm64 vgic/its +tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on +KVM device "kvm-arm-vgic-its" when dirty ring is enabled. + 8.30 KVM_CAP_XEN_HVM -------------------- @@ -8025,12 +8068,13 @@ to userspace. This capability indicates the features that Xen supports for hosting Xen PVHVM guests. Valid flags are:: - #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) - #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) - #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) - #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) - #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) - #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) + #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) + #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + #define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3) + #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4) + #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5) + #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG (1 << 6) The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG ioctl is available, for the guest to set its hypercall page. @@ -8062,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes. related to event channel delivery, timers, and the XENVER_version interception. +The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports +the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR +and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the +XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during +updates of the runstate information. Note that versions of KVM which support +the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will +always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure, +which is perhaps counterintuitive. When this flag is advertised, KVM will +behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless +specifically enabled (by the guest making the hypercall, causing the VMM +to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute). + 8.31 KVM_CAP_PPC_MULTITCE ------------------------- diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst index 392521af7c90562fa61d0245df8d94ce002d3c93..e88b34e586be29337f9b89460d3023459235b08a 100644 --- a/Documentation/virt/kvm/arm/pvtime.rst +++ b/Documentation/virt/kvm/arm/pvtime.rst @@ -23,21 +23,23 @@ the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1 ARCH_FEATURES mechanism before calling it. PV_TIME_FEATURES - ============= ======== ========== + + ============= ======== ================================================= Function ID: (uint32) 0xC5000020 PV_call_id: (uint32) The function to query for support. Currently only PV_TIME_ST is supported. Return value: (int64) NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant PV-time feature is supported by the hypervisor. - ============= ======== ========== + ============= ======== ================================================= PV_TIME_ST - ============= ======== ========== + + ============= ======== ============================================== Function ID: (uint32) 0xC5000021 Return value: (int64) IPA of the stolen time data structure for this VCPU. On failure: NOT_SUPPORTED (-1) - ============= ======== ========== + ============= ======== ============================================== The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory with inner and outer write back caching attributes, in the inner shareable @@ -76,5 +78,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of these structures and not used for other purposes, this enables the guest to map the region using 64k pages and avoids conflicting attributes with other memory. -For the user space interface see Documentation/virt/kvm/devices/vcpu.rst -section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL". +For the user space interface see +:ref:`Documentation/virt/kvm/devices/vcpu.rst `. \ No newline at end of file diff --git a/Documentation/virt/kvm/devices/arm-vgic-its.rst b/Documentation/virt/kvm/devices/arm-vgic-its.rst index d257eddbae29688651073903b4b6cbdb83347658..e053124f77c48a7b9757c0d27fa9b02717cc8b33 100644 --- a/Documentation/virt/kvm/devices/arm-vgic-its.rst +++ b/Documentation/virt/kvm/devices/arm-vgic-its.rst @@ -52,7 +52,10 @@ KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEV_ARM_ITS_SAVE_TABLES save the ITS table data into guest RAM, at the location provisioned - by the guest in corresponding registers/table entries. + by the guest in corresponding registers/table entries. Should userspace + require a form of dirty tracking to identify which pages are modified + by the saving process, it should use a bitmap even if using another + mechanism to track the memory dirtied by the vCPUs. The layout of the tables in guest memory defines an ABI. The entries are laid out in little endian format as described in the last paragraph. diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst index 716aa3edae14fde2625025fe86d418020ef63695..31f14ec4a65b6a592f1cc36dfab05a0b14819fdc 100644 --- a/Documentation/virt/kvm/devices/vcpu.rst +++ b/Documentation/virt/kvm/devices/vcpu.rst @@ -171,6 +171,8 @@ configured values on other VCPUs. Userspace should configure the interrupt numbers on at least one VCPU after creating all VCPUs and before running any VCPUs. +.. _kvm_arm_vcpu_pvtime_ctrl: + 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL ================================== diff --git a/MAINTAINERS b/MAINTAINERS index 384d8cfb9fdd0adcc212a644bdf7128f069ba17a..06a9a2981bf1ef903d3e6985aabece451e4062a8 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -312,6 +312,13 @@ L: linux-iio@vger.kernel.org S: Maintained F: drivers/counter/104-quad-8.c +ACCES IDIO-16 GPIO LIBRARY +M: William Breathitt Gray +L: linux-gpio@vger.kernel.org +S: Maintained +F: drivers/gpio/gpio-idio-16.c +F: drivers/gpio/gpio-idio-16.h + ACCES PCI-IDIO-16 GPIO DRIVER M: William Breathitt Gray L: linux-gpio@vger.kernel.org @@ -7889,6 +7896,7 @@ M: Chao Yu L: linux-f2fs-devel@lists.sourceforge.net S: Maintained W: https://f2fs.wiki.kernel.org/ +B: https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs T: git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git F: Documentation/ABI/testing/sysfs-fs-f2fs F: Documentation/filesystems/f2fs.rst @@ -8105,6 +8113,8 @@ S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening F: include/linux/fortify-string.h F: lib/fortify_kunit.c +F: lib/memcpy_kunit.c +F: lib/strscpy_kunit.c F: lib/test_fortify/* F: scripts/test_fortify.sh K: \b__NO_FORTIFY\b @@ -9263,6 +9273,7 @@ HISILICON GPIO DRIVER M: Jay Fang L: linux-gpio@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml F: drivers/gpio/gpio-hisi.c HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE) @@ -9279,6 +9290,7 @@ M: Yicong Yang L: linux-i2c@vger.kernel.org S: Maintained W: https://www.hisilicon.com +F: Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml F: drivers/i2c/busses/i2c-hisi.c HISILICON LPC BUS DRIVER @@ -9770,8 +9782,7 @@ F: Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml F: drivers/i3c/master/i3c-master-cdns.c I3C DRIVER FOR SYNOPSYS DESIGNWARE -M: Vitor Soares -S: Maintained +S: Orphan F: Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml F: drivers/i3c/master/dw* @@ -10800,6 +10811,18 @@ F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c F: include/linux/iova.h +IOMMUFD +M: Jason Gunthorpe +M: Kevin Tian +L: iommu@lists.linux.dev +S: Maintained +T: git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git +F: Documentation/userspace-api/iommufd.rst +F: drivers/iommu/iommufd/ +F: include/linux/iommufd.h +F: include/uapi/linux/iommufd.h +F: tools/testing/selftests/iommu/ + IOMMU SUBSYSTEM M: Joerg Roedel M: Will Deacon @@ -11204,6 +11227,8 @@ M: Kees Cook L: linux-hardening@vger.kernel.org S: Supported T: git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening +F: Documentation/ABI/testing/sysfs-kernel-oops_count +F: Documentation/ABI/testing/sysfs-kernel-warn_count F: include/linux/overflow.h F: include/linux/randomize_kstack.h F: mm/usercopy.c @@ -11421,6 +11446,16 @@ F: arch/x86/kvm/svm/hyperv.* F: arch/x86/kvm/svm/svm_onhyperv.* F: arch/x86/kvm/vmx/evmcs.* +KVM X86 Xen (KVM/Xen) +M: David Woodhouse +M: Paul Durrant +M: Sean Christopherson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git +F: arch/x86/kvm/xen.* + KERNFS M: Greg Kroah-Hartman M: Tejun Heo @@ -13801,6 +13836,15 @@ F: drivers/scsi/smartpqi/smartpqi*.[ch] F: include/linux/cciss*.h F: include/uapi/linux/cciss*.h +MICROSOFT MANA RDMA DRIVER +M: Long Li +M: Ajay Sharma +L: linux-rdma@vger.kernel.org +S: Supported +F: drivers/infiniband/hw/mana/ +F: include/net/mana +F: include/uapi/rdma/mana-abi.h + MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH M: Maximilian Luz L: platform-driver-x86@vger.kernel.org @@ -17924,6 +17968,13 @@ F: Documentation/ABI/*/sysfs-driver-hid-roccat* F: drivers/hid/hid-roccat* F: include/linux/hid-roccat* +ROCKCHIP CRYPTO DRIVERS +M: Corentin Labbe +L: linux-crypto@vger.kernel.org +S: Maintained +F: Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml +F: drivers/crypto/rockchip/ + ROCKCHIP I2S TDM DRIVER M: Nicolas Frattaroli L: linux-rockchip@lists.infradead.org @@ -19037,7 +19088,7 @@ M: Jason A. Donenfeld S: Maintained F: include/linux/siphash.h F: lib/siphash.c -F: lib/test_siphash.c +F: lib/siphash_kunit.c SIS 190 ETHERNET DRIVER M: Francois Romieu @@ -21738,7 +21789,7 @@ M: Alex Williamson R: Cornelia Huck L: kvm@vger.kernel.org S: Maintained -T: git git://github.com/awilliam/linux-vfio.git +T: git https://github.com/awilliam/linux-vfio.git F: Documentation/ABI/testing/sysfs-devices-vfio-dev F: Documentation/driver-api/vfio.rst F: drivers/vfio/ diff --git a/Makefile b/Makefile index 3327d5ba03b3048b18d6b317ad7f8360377350cb..cbc2b1984a18dc9765c6906b80f9ddda8e71cfaa 100644 --- a/Makefile +++ b/Makefile @@ -1032,8 +1032,8 @@ KBUILD_CFLAGS += $(CC_FLAGS_CFI) export CC_FLAGS_CFI endif -ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B -KBUILD_CFLAGS += -falign-functions=64 +ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0) +KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT) endif # arch Makefile may override CC so keep this after arch Makefile is included @@ -1146,7 +1146,7 @@ endif # We never want expected sections to be placed heuristically by the # linker. All sections should be explicitly named in the linker script. ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Align the bit size of userspace programs with the kernel diff --git a/arch/Kconfig b/arch/Kconfig index 2d0e7099eb3ffa211a84f92030f46f13274b2c2d..a3c47c2a79cd4ee640feeb2f4dc698472171899c 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1438,4 +1438,28 @@ source "kernel/gcov/Kconfig" source "scripts/gcc-plugins/Kconfig" +config FUNCTION_ALIGNMENT_4B + bool + +config FUNCTION_ALIGNMENT_8B + bool + +config FUNCTION_ALIGNMENT_16B + bool + +config FUNCTION_ALIGNMENT_32B + bool + +config FUNCTION_ALIGNMENT_64B + bool + +config FUNCTION_ALIGNMENT + int + default 64 if FUNCTION_ALIGNMENT_64B + default 32 if FUNCTION_ALIGNMENT_32B + default 16 if FUNCTION_ALIGNMENT_16B + default 8 if FUNCTION_ALIGNMENT_8B + default 4 if FUNCTION_ALIGNMENT_4B + default 0 + endmenu diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 65d0daf6326218c1b0c8101aca08dadc6f5aa8ca..a0da424fc93aac9bc4ad732f3a0dcd04a1155d5a 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1158,27 +1158,6 @@ config ARM_PSCI 0022A ("Power State Coordination Interface System Software on ARM processors"). -# The GPIO number here must be sorted by descending number. In case of -# a multiplatform kernel, we just want the highest value required by the -# selected platforms. -config ARCH_NR_GPIO - int - default 2048 if ARCH_INTEL_SOCFPGA - default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \ - ARCH_ZYNQ || ARCH_ASPEED - default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \ - SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210 - default 416 if ARCH_SUNXI - default 392 if ARCH_U8500 - default 352 if ARCH_VT8500 - default 288 if ARCH_ROCKCHIP - default 264 if MACH_H4700 - default 0 - help - Maximum number of GPIOs in the system. - - If unsure, leave the default value. - config HZ_FIXED int default 128 if SOC_AT91RM9200 diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 8c63f0a26f56b04c1b27b8b8ef597a59d6836eec..2ef651a78fa2a979cdf3712c01f8481b69789093 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -124,7 +124,7 @@ LDFLAGS_vmlinux += --no-undefined LDFLAGS_vmlinux += -X # Report orphan sections ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif # Next argument is a linker script LDFLAGS_vmlinux += -T diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 3858c4d4cb98854d023075626878bc928062a42d..7b2b7d043d9be9e6f5c432badf2adb9938883eaf 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -18,7 +18,7 @@ config CRYPTO_GHASH_ARM_CE depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_CRYPTD - select CRYPTO_GF128MUL + select CRYPTO_LIB_GF128MUL help GCM GHASH function (NIST SP800-38D) diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c index 8cd00f56800e7f97574a7f7756c896f546c7284c..6dfaef2d8f913c539f484f00c7f08fe77232f85d 100644 --- a/arch/arm/crypto/aes-cipher-glue.c +++ b/arch/arm/crypto/aes-cipher-glue.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out); diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S index 434d80ab531c2a600fbcffc89c21e6a8ad5ef284..01620a0782ca93f2aa34730b526fe25f1c273e83 100644 --- a/arch/arm/crypto/nh-neon-core.S +++ b/arch/arm/crypto/nh-neon-core.S @@ -69,7 +69,7 @@ /* * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c index ffa8d73fe722c231720e86d3d6aeb91ab283754b..e93e41ff265665a094efd8cc2c791bef5479e7dd 100644 --- a/arch/arm/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm/crypto/nhpoly1305-neon-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_neon(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); kernel_neon_end(); src += n; srclen -= n; diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h index f3bb8a2bf788eff111986bd8c2ae0a9f4da8aeda..4ebbb58f06ea62954d004f39562ef141e6f5c462 100644 --- a/arch/arm/include/asm/gpio.h +++ b/arch/arm/include/asm/gpio.h @@ -2,7 +2,6 @@ #ifndef _ARCH_ARM_GPIO_H #define _ARCH_ARM_GPIO_H -/* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */ #include /* The trivial gpiolib dispatchers */ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7fc3457a8891c22d05ba23baa6ebf5b4c3bdb5cb..cf6d1cd8b6dc5dbc3b0797eeffeebc2e6c5f1350 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1988,6 +1988,7 @@ config ARM64_MTE depends on ARM64_PAN select ARCH_HAS_SUBPAGE_FAULTS select ARCH_USES_HIGH_VMA_FLAGS + select ARCH_USES_PG_ARCH_X help Memory Tagging (part of the ARMv8.5 Extensions) provides architectural support for run-time, always-on detection of @@ -2168,18 +2169,6 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG -# The GPIO number here must be sorted by descending number. In case of -# a multiplatform kernel, we just want the highest value required by the -# selected platforms. -config ARCH_NR_GPIO - int - default 2048 if ARCH_APPLE - default 0 - help - Maximum number of GPIOs in the system. - - If unsure, leave the default value. - config UNWIND_PATCH_PAC_INTO_SCS bool "Enable shadow call stack dynamically using code patching" # needs Clang with https://reviews.llvm.org/D111780 incorporated diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index 8bd80508a710d1120afc320701111b8397f34f86..6d06b448a66e0ca6d62c728e04754ac2cbb65f6e 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -6,8 +6,8 @@ config CRYPTO_GHASH_ARM64_CE tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_GF128MUL select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL select CRYPTO_AEAD help GCM GHASH function (NIST SP800-38D) @@ -96,6 +96,17 @@ config CRYPTO_SHA3_ARM64 Architecture: arm64 using: - ARMv8.2 Crypto Extensions +config CRYPTO_SM3_NEON + tristate "Hash functions: SM3 (NEON)" + depends on KERNEL_MODE_NEON + select CRYPTO_HASH + select CRYPTO_SM3 + help + SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012) + + Architecture: arm64 using: + - NEON (Advanced SIMD) extensions + config CRYPTO_SM3_ARM64_CE tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON @@ -220,7 +231,7 @@ config CRYPTO_SM4_ARM64_CE - NEON (Advanced SIMD) extensions config CRYPTO_SM4_ARM64_CE_BLK - tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)" + tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR/XTS (ARMv8 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_SM4 @@ -231,6 +242,8 @@ config CRYPTO_SM4_ARM64_CE_BLK - CBC (Cipher Block Chaining) mode (NIST SP800-38A) - CFB (Cipher Feedback) mode (NIST SP800-38A) - CTR (Counter) mode (NIST SP800-38A) + - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E + and IEEE 1619) Architecture: arm64 using: - ARMv8 Crypto Extensions @@ -268,6 +281,38 @@ config CRYPTO_AES_ARM64_CE_CCM - ARMv8 Crypto Extensions - NEON (Advanced SIMD) extensions +config CRYPTO_SM4_ARM64_CE_CCM + tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AEAD + select CRYPTO_SM4 + select CRYPTO_SM4_ARM64_CE_BLK + help + AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with + CCM (Counter with Cipher Block Chaining-Message Authentication Code) + authenticated encryption mode (NIST SP800-38C) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - NEON (Advanced SIMD) extensions + +config CRYPTO_SM4_ARM64_CE_GCM + tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_AEAD + select CRYPTO_SM4 + select CRYPTO_SM4_ARM64_CE_BLK + help + AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with + GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) + + Architecture: arm64 using: + - ARMv8 Crypto Extensions + - PMULL (Polynomial Multiply Long) instructions + - NEON (Advanced SIMD) extensions + config CRYPTO_CRCT10DIF_ARM64_CE tristate "CRCT10DIF (PMULL)" depends on KERNEL_MODE_NEON && CRC_T10DIF diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 24bb0c4610de24f52e06f2230a8d87da34eeee7e..4818e204c2aca2d28e65e38b290b5d9879991436 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -17,6 +17,9 @@ sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o +obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o +sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o + obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o @@ -26,6 +29,12 @@ sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o +sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o + +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o +sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o + obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c index 56a5f6f0b0c123ac8668b0125b4b04bb9af052ed..e921823ca103a4e4081282f88a68fb9dfd09b72d 100644 --- a/arch/arm64/crypto/aes-ce-glue.c +++ b/arch/arm64/crypto/aes-ce-glue.c @@ -9,9 +9,9 @@ #include #include #include +#include #include #include -#include #include #include "aes-ce-setkey.h" diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c index 8caf6dfefce88ec91cd0b3f5e510a30ebcde7849..4ec55e568941c0306cbb1809334c4e10827968bf 100644 --- a/arch/arm64/crypto/aes-cipher-glue.c +++ b/arch/arm64/crypto/aes-cipher-glue.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds); diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index 5abc834271f4a61097e7a71d365d2fef8d51b12b..0e834a2c062cf2659ba974f0546ba416fad556d1 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -52,8 +52,7 @@ SYM_FUNC_END(aes_decrypt_block5x) */ AES_FUNC_START(aes_ecb_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 enc_prepare w3, x2, x5 @@ -77,14 +76,13 @@ ST5( st1 {v4.16b}, [x0], #16 ) subs w4, w4, #1 bne .Lecbencloop .Lecbencout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_ecb_encrypt) AES_FUNC_START(aes_ecb_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 dec_prepare w3, x2, x5 @@ -108,7 +106,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) subs w4, w4, #1 bne .Lecbdecloop .Lecbdecout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_ecb_decrypt) @@ -171,9 +169,6 @@ AES_FUNC_END(aes_cbc_encrypt) AES_FUNC_END(aes_essiv_cbc_encrypt) AES_FUNC_START(aes_essiv_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - ld1 {cbciv.16b}, [x5] /* get iv */ mov w8, #14 /* AES-256: 14 rounds */ @@ -182,11 +177,9 @@ AES_FUNC_START(aes_essiv_cbc_decrypt) b .Lessivcbcdecstart AES_FUNC_START(aes_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - ld1 {cbciv.16b}, [x5] /* get iv */ .Lessivcbcdecstart: + frame_push 0 dec_prepare w3, x2, x6 .LcbcdecloopNx: @@ -236,7 +229,7 @@ ST5( st1 {v4.16b}, [x0], #16 ) bne .Lcbcdecloop .Lcbcdecout: st1 {cbciv.16b}, [x5] /* return iv */ - ldp x29, x30, [sp], #16 + frame_pop ret AES_FUNC_END(aes_cbc_decrypt) AES_FUNC_END(aes_essiv_cbc_decrypt) @@ -337,8 +330,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt) BLOCKS .req x13 BLOCKS_W .req w13 - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 enc_prepare ROUNDS_W, KEY, IV_PART ld1 {vctr.16b}, [IV] @@ -481,7 +473,7 @@ ST5( st1 {v4.16b}, [OUT], #16 ) .if !\xctr st1 {vctr.16b}, [IV] /* return next CTR value */ .endif - ldp x29, x30, [sp], #16 + frame_pop ret .Lctrtail\xctr: @@ -645,8 +637,7 @@ AES_FUNC_END(aes_xctr_encrypt) .endm AES_FUNC_START(aes_xts_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 ld1 {v4.16b}, [x6] xts_load_mask v8 @@ -704,7 +695,7 @@ AES_FUNC_START(aes_xts_encrypt) st1 {v0.16b}, [x0] .Lxtsencret: st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + frame_pop ret .LxtsencctsNx: @@ -732,8 +723,7 @@ AES_FUNC_START(aes_xts_encrypt) AES_FUNC_END(aes_xts_encrypt) AES_FUNC_START(aes_xts_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 0 /* subtract 16 bytes if we are doing CTS */ sub w8, w4, #0x10 @@ -794,7 +784,7 @@ AES_FUNC_START(aes_xts_decrypt) b .Lxtsdecloop .Lxtsdecout: st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + frame_pop ret .Lxtsdeccts: diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index d427f4556b6eb2d8ddd3afb7dcf577ff3e6882ff..7278a37c2d5cd0005f935810703bd94bcaa7ee32 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -760,7 +760,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) eor v6.16b, v6.16b, v31.16b eor v7.16b, v7.16b, v16.16b - stp q16, q17, [sp, #16] + stp q16, q17, [x6] mov bskey, x2 mov rounds, x3 @@ -768,8 +768,8 @@ SYM_FUNC_START_LOCAL(__xts_crypt8) SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-48]! - mov x29, sp + frame_push 0, 32 + add x6, sp, #.Lframe_local_offset ld1 {v25.16b}, [x5] @@ -781,7 +781,7 @@ SYM_FUNC_END(__xts_crypt8) eor v18.16b, \o2\().16b, v27.16b eor v19.16b, \o3\().16b, v28.16b - ldp q24, q25, [sp, #16] + ldp q24, q25, [x6] eor v20.16b, \o4\().16b, v29.16b eor v21.16b, \o5\().16b, v30.16b @@ -795,7 +795,7 @@ SYM_FUNC_END(__xts_crypt8) b.gt 0b st1 {v25.16b}, [x5] - ldp x29, x30, [sp], #48 + frame_pop ret .endm @@ -820,9 +820,7 @@ SYM_FUNC_END(aesbs_xts_decrypt) * int rounds, int blocks, u8 iv[]) */ SYM_FUNC_START(aesbs_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - + frame_push 0 ldp x7, x8, [x5] ld1 {v0.16b}, [x5] CPU_LE( rev x7, x7 ) @@ -862,6 +860,6 @@ CPU_LE( rev x8, x8 ) b.gt 0b st1 {v0.16b}, [x5] - ldp x29, x30, [sp], #16 + frame_pop ret SYM_FUNC_END(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index dce6dcebfca189ee4f99d8c7ee03c00cc879f878..5604de61d06d04eeac8fb616859098b53a8cedd8 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -429,7 +429,7 @@ CPU_LE( ext v0.16b, v0.16b, v0.16b, #8 ) umov w0, v0.h[0] .ifc \p, p8 - ldp x29, x30, [sp], #16 + frame_pop .endif ret @@ -466,8 +466,7 @@ CPU_LE( ext v7.16b, v7.16b, v7.16b, #8 ) // Assumes len >= 16. // SYM_FUNC_START(crc_t10dif_pmull_p8) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 1 crc_t10dif_pmull p8 SYM_FUNC_END(crc_t10dif_pmull_p8) diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index ebe5558929b7bba68fa2802906fbf88a759de077..23ee9a5eaf27c23c5b30ead46d7761e1909cd4a5 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -436,9 +436,7 @@ SYM_FUNC_END(pmull_ghash_update_p8) .align 6 .macro pmull_gcm_do_crypt, enc - stp x29, x30, [sp, #-32]! - mov x29, sp - str x19, [sp, #24] + frame_push 1 load_round_keys x7, x6, x8 @@ -529,7 +527,7 @@ CPU_LE( rev w8, w8 ) .endif bne 0b -3: ldp x19, x10, [sp, #24] +3: ldr x10, [sp, #.Lframe_local_offset] cbz x10, 5f // output tag? ld1 {INP3.16b}, [x10] // load lengths[] @@ -562,7 +560,7 @@ CPU_LE( rev w8, w8 ) smov w0, v0.b[0] // return b0 .endif -4: ldp x29, x30, [sp], #32 +4: frame_pop ret 5: diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index 15794fe21a0b2eca5360bd6f793b6b4b7fb36181..e5e9adc1fcf4e0279e1a9e368d1c53e506aeccd8 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -508,7 +508,7 @@ static void __exit ghash_ce_mod_exit(void) crypto_unregister_shash(&ghash_alg); } -static const struct cpu_feature ghash_cpu_feature[] = { +static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = { { cpu_feature(PMULL) }, { } }; MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature); diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S index 51c0a534ef87ccf1083f48051b1e617913479c46..13eda08fda1e568d3f657f5128a58188a9f08b1a 100644 --- a/arch/arm64/crypto/nh-neon-core.S +++ b/arch/arm64/crypto/nh-neon-core.S @@ -8,6 +8,7 @@ */ #include +#include KEY .req x0 MESSAGE .req x1 @@ -58,11 +59,11 @@ /* * void nh_neon(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_neon) +SYM_TYPED_FUNC_START(nh_neon) ld1 {K0.4s,K1.4s}, [KEY], #32 movi PASS0_SUMS.2d, #0 diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c index c5405e6a6db76f7cbbbccda47cc3a116c1c11e45..cd882c35d9252d8608d69e4fe0d9d6978681fb37 100644 --- a/arch/arm64/crypto/nhpoly1305-neon-glue.c +++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_neon(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_neon(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_neon_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_neon_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon); + crypto_nhpoly1305_update_helper(desc, src, n, nh_neon); kernel_neon_end(); src += n; srclen -= n; diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index ee98954ae8ca682651a00b6de2241bd61210813a..54bf6ebcfffb1567562af61f188c9bf6ccade166 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -84,7 +84,7 @@ static struct shash_alg sm3_alg = { .base.cra_driver_name = "sm3-ce", .base.cra_blocksize = SM3_BLOCK_SIZE, .base.cra_module = THIS_MODULE, - .base.cra_priority = 200, + .base.cra_priority = 400, }; static int __init sm3_ce_mod_init(void) diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S new file mode 100644 index 0000000000000000000000000000000000000000..4357e0e51be3881bd44e7d5837294c222f0a787b --- /dev/null +++ b/arch/arm64/crypto/sm3-neon-core.S @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * sm3-neon-core.S - SM3 secure hash using NEON instructions + * + * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64 + * + * Copyright (C) 2021 Jussi Kivilinna + * Copyright (c) 2022 Tianjia Zhang + */ + +#include +#include +#include + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) + +#define STACK_W (0) +#define STACK_SIZE (STACK_W + STACK_W_SIZE) + +/* Register macros */ + +#define RSTATE x0 +#define RDATA x1 +#define RNBLKS x2 +#define RKPTR x28 +#define RFRAME x29 + +#define ra w3 +#define rb w4 +#define rc w5 +#define rd w6 +#define re w7 +#define rf w8 +#define rg w9 +#define rh w10 + +#define t0 w11 +#define t1 w12 +#define t2 w13 +#define t3 w14 +#define t4 w15 +#define t5 w16 +#define t6 w17 + +#define k_even w19 +#define k_odd w20 + +#define addr0 x21 +#define addr1 x22 + +#define s0 w23 +#define s1 w24 +#define s2 w25 +#define s3 w26 + +#define W0 v0 +#define W1 v1 +#define W2 v2 +#define W3 v3 +#define W4 v4 +#define W5 v5 + +#define XTMP0 v6 +#define XTMP1 v7 +#define XTMP2 v16 +#define XTMP3 v17 +#define XTMP4 v18 +#define XTMP5 v19 +#define XTMP6 v20 + +/* Helper macros. */ + +#define _(...) /*_*/ + +#define clear_vec(x) \ + movi x.8h, #0; + +#define rolw(o, a, n) \ + ror o, a, #(32 - n); + +/* Round function macros. */ + +#define GG1_1(x, y, z, o, t) \ + eor o, x, y; +#define GG1_2(x, y, z, o, t) \ + eor o, o, z; +#define GG1_3(x, y, z, o, t) + +#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t) +#define FF1_2(x, y, z, o, t) +#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t) + +#define GG2_1(x, y, z, o, t) \ + bic o, z, x; +#define GG2_2(x, y, z, o, t) \ + and t, y, x; +#define GG2_3(x, y, z, o, t) \ + eor o, o, t; + +#define FF2_1(x, y, z, o, t) \ + eor o, x, y; +#define FF2_2(x, y, z, o, t) \ + and t, x, y; \ + and o, o, z; +#define FF2_3(x, y, z, o, t) \ + eor o, o, t; + +#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + K_LOAD(round); \ + ldr t5, [sp, #(wtype##_W1_ADDR(round, widx))]; \ + rolw(t0, a, 12); /* rol(a, 12) => t0 */ \ + IOP(1, iop_param); \ + FF##i##_1(a, b, c, t1, t2); \ + ldr t6, [sp, #(wtype##_W1W2_ADDR(round, widx))]; \ + add k, k, e; \ + IOP(2, iop_param); \ + GG##i##_1(e, f, g, t3, t4); \ + FF##i##_2(a, b, c, t1, t2); \ + IOP(3, iop_param); \ + add k, k, t0; \ + add h, h, t5; \ + add d, d, t6; /* w1w2 + d => d */ \ + IOP(4, iop_param); \ + rolw(k, k, 7); /* rol (t0 + e + t), 7) => k */ \ + GG##i##_2(e, f, g, t3, t4); \ + add h, h, k; /* h + w1 + k => h */ \ + IOP(5, iop_param); \ + FF##i##_3(a, b, c, t1, t2); \ + eor t0, t0, k; /* k ^ t0 => t0 */ \ + GG##i##_3(e, f, g, t3, t4); \ + add d, d, t1; /* FF(a,b,c) + d => d */ \ + IOP(6, iop_param); \ + add t3, t3, h; /* GG(e,f,g) + h => t3 */ \ + rolw(b, b, 9); /* rol(b, 9) => b */ \ + eor h, t3, t3, ror #(32-9); \ + IOP(7, iop_param); \ + add d, d, t0; /* t0 + d => d */ \ + rolw(f, f, 19); /* rol(f, 19) => f */ \ + IOP(8, iop_param); \ + eor h, h, t3, ror #(32-17); /* P0(t3) => h */ + +#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) + +#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \ + R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param) + +#define KL(round) \ + ldp k_even, k_odd, [RKPTR, #(4*(round))]; + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4)) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4)) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 32) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16) + +/* Input block loading. + * Interleaving within round function needed for in-order CPUs. */ +#define LOAD_W_VEC_1_1() \ + add addr0, sp, #IW_W1_ADDR(0, 0); +#define LOAD_W_VEC_1_2() \ + add addr1, sp, #IW_W1_ADDR(4, 0); +#define LOAD_W_VEC_1_3() \ + ld1 {W0.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_4() \ + ld1 {W1.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_5() \ + ld1 {W2.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_6() \ + ld1 {W3.16b}, [RDATA], #16; +#define LOAD_W_VEC_1_7() \ + rev32 XTMP0.16b, W0.16b; +#define LOAD_W_VEC_1_8() \ + rev32 XTMP1.16b, W1.16b; +#define LOAD_W_VEC_2_1() \ + rev32 XTMP2.16b, W2.16b; +#define LOAD_W_VEC_2_2() \ + rev32 XTMP3.16b, W3.16b; +#define LOAD_W_VEC_2_3() \ + eor XTMP4.16b, XTMP1.16b, XTMP0.16b; +#define LOAD_W_VEC_2_4() \ + eor XTMP5.16b, XTMP2.16b, XTMP1.16b; +#define LOAD_W_VEC_2_5() \ + st1 {XTMP0.16b}, [addr0], #16; +#define LOAD_W_VEC_2_6() \ + st1 {XTMP4.16b}, [addr0]; \ + add addr0, sp, #IW_W1_ADDR(8, 0); +#define LOAD_W_VEC_2_7() \ + eor XTMP6.16b, XTMP3.16b, XTMP2.16b; +#define LOAD_W_VEC_2_8() \ + ext W0.16b, XTMP0.16b, XTMP0.16b, #8; /* W0: xx, w0, xx, xx */ +#define LOAD_W_VEC_3_1() \ + mov W2.16b, XTMP1.16b; /* W2: xx, w6, w5, w4 */ +#define LOAD_W_VEC_3_2() \ + st1 {XTMP1.16b}, [addr1], #16; +#define LOAD_W_VEC_3_3() \ + st1 {XTMP5.16b}, [addr1]; \ + ext W1.16b, XTMP0.16b, XTMP0.16b, #4; /* W1: xx, w3, w2, w1 */ +#define LOAD_W_VEC_3_4() \ + ext W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */ +#define LOAD_W_VEC_3_5() \ + ext W4.16b, XTMP2.16b, XTMP3.16b, #8; /* W4: xx, w12, w11, w10 */ +#define LOAD_W_VEC_3_6() \ + st1 {XTMP2.16b}, [addr0], #16; +#define LOAD_W_VEC_3_7() \ + st1 {XTMP6.16b}, [addr0]; +#define LOAD_W_VEC_3_8() \ + ext W5.16b, XTMP3.16b, XTMP3.16b, #4; /* W5: xx, w15, w14, w13 */ + +#define LOAD_W_VEC_1(iop_num, ...) \ + LOAD_W_VEC_1_##iop_num() +#define LOAD_W_VEC_2(iop_num, ...) \ + LOAD_W_VEC_2_##iop_num() +#define LOAD_W_VEC_3(iop_num, ...) \ + LOAD_W_VEC_3_##iop_num() + +/* Message scheduling. Note: 3 words per vector register. + * Interleaving within round function needed for in-order CPUs. */ +#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + /* Load (w[i - 13]) => XTMP5 */ \ + ext XTMP0.16b, w0.16b, w0.16b, #12; /* XTMP0: w0, xx, xx, xx */ +#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP5.16b, w1.16b, w1.16b, #12; +#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */ +#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP5.16b, XTMP5.16b, w2.16b, #12; +#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 9] == w3 */ \ + /* W3 ^ XTMP0 => XTMP0 */ \ + eor XTMP0.16b, XTMP0.16b, w3.16b; +#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + /* rol(XTMP5, 7) => XTMP1 */ \ + add addr0, sp, #XW_W1_ADDR((round), 0); \ + shl XTMP2.4s, w5.4s, #15; +#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \ + shl XTMP1.4s, XTMP5.4s, #7; +#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP2.4s, w5.4s, #(32-15); +#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP1.4s, XTMP5.4s, #(32-7); +#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \ + eor XTMP0.16b, XTMP0.16b, XTMP2.16b; +#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 6] == W4 */ \ + /* W4 ^ XTMP1 => XTMP1 */ \ + eor XTMP1.16b, XTMP1.16b, w4.16b; +#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \ + /* P1(XTMP0) ^ XTMP1 => W0 */ \ + shl XTMP3.4s, XTMP0.4s, #15; +#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \ + shl XTMP4.4s, XTMP0.4s, #23; +#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, XTMP1.16b, XTMP0.16b; +#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP3.4s, XTMP0.4s, #(32-15); +#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \ + sri XTMP4.4s, XTMP0.4s, #(32-23); +#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, w0.16b, XTMP3.16b; +#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 3]) => XTMP2 */ \ + ext XTMP2.16b, w4.16b, w4.16b, #12; +#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \ + eor w0.16b, w0.16b, XTMP4.16b; +#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \ + ext XTMP2.16b, XTMP2.16b, w5.16b, #12; +#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \ + /* W1 ^ W2 => XTMP3 */ \ + eor XTMP3.16b, XTMP2.16b, w0.16b; +#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5) +#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \ + st1 {XTMP2.16b-XTMP3.16b}, [addr0]; +#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5) + +#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5) +#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5) +#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5) + +#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0) +#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0) +#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0) + +#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1) +#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1) +#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1) + +#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2) +#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2) +#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2) + +#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3) +#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3) +#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3) + +#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \ + SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4) +#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \ + SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4) +#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \ + SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4) + + + /* + * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'. + * + * void sm3_neon_transform(struct sm3_state *sst, u8 const *src, + * int blocks) + */ + .text +.align 3 +SYM_TYPED_FUNC_START(sm3_neon_transform) + ldp ra, rb, [RSTATE, #0] + ldp rc, rd, [RSTATE, #8] + ldp re, rf, [RSTATE, #16] + ldp rg, rh, [RSTATE, #24] + + stp x28, x29, [sp, #-16]! + stp x19, x20, [sp, #-16]! + stp x21, x22, [sp, #-16]! + stp x23, x24, [sp, #-16]! + stp x25, x26, [sp, #-16]! + mov RFRAME, sp + + sub addr0, sp, #STACK_SIZE + adr_l RKPTR, .LKtable + and sp, addr0, #(~63) + + /* Preload first block. */ + LOAD_W_VEC_1(1, 0) + LOAD_W_VEC_1(2, 0) + LOAD_W_VEC_1(3, 0) + LOAD_W_VEC_1(4, 0) + LOAD_W_VEC_1(5, 0) + LOAD_W_VEC_1(6, 0) + LOAD_W_VEC_1(7, 0) + LOAD_W_VEC_1(8, 0) + LOAD_W_VEC_2(1, 0) + LOAD_W_VEC_2(2, 0) + LOAD_W_VEC_2(3, 0) + LOAD_W_VEC_2(4, 0) + LOAD_W_VEC_2(5, 0) + LOAD_W_VEC_2(6, 0) + LOAD_W_VEC_2(7, 0) + LOAD_W_VEC_2(8, 0) + LOAD_W_VEC_3(1, 0) + LOAD_W_VEC_3(2, 0) + LOAD_W_VEC_3(3, 0) + LOAD_W_VEC_3(4, 0) + LOAD_W_VEC_3(5, 0) + LOAD_W_VEC_3(6, 0) + LOAD_W_VEC_3(7, 0) + LOAD_W_VEC_3(8, 0) + +.balign 16 +.Loop: + /* Transform 0-3 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 1, 1, IW, _, 0) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 3, 3, IW, _, 0) + + /* Transform 4-7 + Precalc 12-14 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 5, 1, IW, _, 0) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12) + + /* Transform 8-11 + Precalc 12-17 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15) + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15) + + /* Transform 12-14 + Precalc 18-20 */ + R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18) + R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18) + R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18) + + /* Transform 15-17 + Precalc 21-23 */ + R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21) + + /* Transform 18-20 + Precalc 24-26 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24) + + /* Transform 21-23 + Precalc 27-29 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27) + + /* Transform 24-26 + Precalc 30-32 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30) + + /* Transform 27-29 + Precalc 33-35 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33) + + /* Transform 30-32 + Precalc 36-38 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36) + + /* Transform 33-35 + Precalc 39-41 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39) + + /* Transform 36-38 + Precalc 42-44 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42) + + /* Transform 39-41 + Precalc 45-47 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45) + + /* Transform 42-44 + Precalc 48-50 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48) + + /* Transform 45-47 + Precalc 51-53 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51) + + /* Transform 48-50 + Precalc 54-56 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54) + + /* Transform 51-53 + Precalc 57-59 */ + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57) + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57) + + /* Transform 54-56 + Precalc 60-62 */ + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60) + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60) + + /* Transform 57-59 + Precalc 63 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63) + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63) + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63) + + /* Transform 60 */ + R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _) + subs RNBLKS, RNBLKS, #1 + b.eq .Lend + + /* Transform 61-63 + Preload next block */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, LOAD_W_VEC_1, _) + ldp s0, s1, [RSTATE, #0] + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _) + ldp s2, s3, [RSTATE, #8] + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, LOAD_W_VEC_3, _) + + /* Update the chaining variables. */ + eor ra, ra, s0 + eor rb, rb, s1 + ldp s0, s1, [RSTATE, #16] + eor rc, rc, s2 + ldp k_even, k_odd, [RSTATE, #24] + eor rd, rd, s3 + eor re, re, s0 + stp ra, rb, [RSTATE, #0] + eor rf, rf, s1 + stp rc, rd, [RSTATE, #8] + eor rg, rg, k_even + stp re, rf, [RSTATE, #16] + eor rh, rh, k_odd + stp rg, rh, [RSTATE, #24] + b .Loop + +.Lend: + /* Transform 61-63 */ + R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd, _, 61, 1, XW, _, _) + ldp s0, s1, [RSTATE, #0] + R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _) + ldp s2, s3, [RSTATE, #8] + R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd, _, 63, 0, XW, _, _) + + /* Update the chaining variables. */ + eor ra, ra, s0 + clear_vec(W0) + eor rb, rb, s1 + clear_vec(W1) + ldp s0, s1, [RSTATE, #16] + clear_vec(W2) + eor rc, rc, s2 + clear_vec(W3) + ldp k_even, k_odd, [RSTATE, #24] + clear_vec(W4) + eor rd, rd, s3 + clear_vec(W5) + eor re, re, s0 + clear_vec(XTMP0) + stp ra, rb, [RSTATE, #0] + clear_vec(XTMP1) + eor rf, rf, s1 + clear_vec(XTMP2) + stp rc, rd, [RSTATE, #8] + clear_vec(XTMP3) + eor rg, rg, k_even + clear_vec(XTMP4) + stp re, rf, [RSTATE, #16] + clear_vec(XTMP5) + eor rh, rh, k_odd + clear_vec(XTMP6) + stp rg, rh, [RSTATE, #24] + + /* Clear message expansion area */ + add addr0, sp, #STACK_W + st1 {W0.16b-W3.16b}, [addr0], #64 + st1 {W0.16b-W3.16b}, [addr0], #64 + st1 {W0.16b-W3.16b}, [addr0] + + mov sp, RFRAME + + ldp x25, x26, [sp], #16 + ldp x23, x24, [sp], #16 + ldp x21, x22, [sp], #16 + ldp x19, x20, [sp], #16 + ldp x28, x29, [sp], #16 + + ret +SYM_FUNC_END(sm3_neon_transform) + + + .section ".rodata", "a" + + .align 4 +.LKtable: + .long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb + .long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc + .long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce + .long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6 + .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c + .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce + .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec + .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 + .long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53 + .long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d + .long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4 + .long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43 + .long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c + .long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce + .long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec + .long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5 diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c new file mode 100644 index 0000000000000000000000000000000000000000..7182ee683f14ace6ea8c23ec9af0ab40fd3db97c --- /dev/null +++ b/arch/arm64/crypto/sm3-neon-glue.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * sm3-neon-glue.c - SM3 secure hash using NEON instructions + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src, + int blocks); + +static int sm3_neon_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + if (!crypto_simd_usable()) { + sm3_update(shash_desc_ctx(desc), data, len); + return 0; + } + + kernel_neon_begin(); + sm3_base_do_update(desc, data, len, sm3_neon_transform); + kernel_neon_end(); + + return 0; +} + +static int sm3_neon_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_neon_begin(); + sm3_base_do_finalize(desc, sm3_neon_transform); + kernel_neon_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_neon_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, out); + return 0; + } + + kernel_neon_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_neon_transform); + sm3_base_do_finalize(desc, sm3_neon_transform); + kernel_neon_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_neon_update, + .final = sm3_neon_final, + .finup = sm3_neon_finup, + .descsize = sizeof(struct sm3_state), + .base.cra_name = "sm3", + .base.cra_driver_name = "sm3-neon", + .base.cra_blocksize = SM3_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, + .base.cra_priority = 200, +}; + +static int __init sm3_neon_init(void) +{ + return crypto_register_shash(&sm3_alg); +} + +static void __exit sm3_neon_fini(void) +{ + crypto_unregister_shash(&sm3_alg); +} + +module_init(sm3_neon_init); +module_exit(sm3_neon_fini); + +MODULE_DESCRIPTION("SM3 secure hash using NEON instructions"); +MODULE_AUTHOR("Jussi Kivilinna "); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/sm4-ce-asm.h b/arch/arm64/crypto/sm4-ce-asm.h new file mode 100644 index 0000000000000000000000000000000000000000..7ea98e42e779cd2d586eb99a07db9168d4c52b24 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-asm.h @@ -0,0 +1,209 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 helper macros for Crypto Extensions + * Copyright (C) 2022 Tianjia Zhang + */ + +#define SM4_PREPARE(ptr) \ + ld1 {v24.16b-v27.16b}, [ptr], #64; \ + ld1 {v28.16b-v31.16b}, [ptr]; + +#define SM4_CRYPT_BLK_BE(b0) \ + sm4e b0.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_BLK(b0) \ + rev32 b0.16b, b0.16b; \ + SM4_CRYPT_BLK_BE(b0); + +#define SM4_CRYPT_BLK2_BE(b0, b1) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + +#define SM4_CRYPT_BLK2(b0, b1) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + SM4_CRYPT_BLK2_BE(b0, b1); + +#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; + +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + SM4_CRYPT_BLK4_BE(b0, b1, b2, b3); + +#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7) \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + sm4e b3.4s, v24.4s; \ + sm4e b4.4s, v24.4s; \ + sm4e b5.4s, v24.4s; \ + sm4e b6.4s, v24.4s; \ + sm4e b7.4s, v24.4s; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + sm4e b3.4s, v25.4s; \ + sm4e b4.4s, v25.4s; \ + sm4e b5.4s, v25.4s; \ + sm4e b6.4s, v25.4s; \ + sm4e b7.4s, v25.4s; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + sm4e b3.4s, v26.4s; \ + sm4e b4.4s, v26.4s; \ + sm4e b5.4s, v26.4s; \ + sm4e b6.4s, v26.4s; \ + sm4e b7.4s, v26.4s; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + sm4e b3.4s, v27.4s; \ + sm4e b4.4s, v27.4s; \ + sm4e b5.4s, v27.4s; \ + sm4e b6.4s, v27.4s; \ + sm4e b7.4s, v27.4s; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + sm4e b3.4s, v28.4s; \ + sm4e b4.4s, v28.4s; \ + sm4e b5.4s, v28.4s; \ + sm4e b6.4s, v28.4s; \ + sm4e b7.4s, v28.4s; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + sm4e b3.4s, v29.4s; \ + sm4e b4.4s, v29.4s; \ + sm4e b5.4s, v29.4s; \ + sm4e b6.4s, v29.4s; \ + sm4e b7.4s, v29.4s; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + sm4e b3.4s, v30.4s; \ + sm4e b4.4s, v30.4s; \ + sm4e b5.4s, v30.4s; \ + sm4e b6.4s, v30.4s; \ + sm4e b7.4s, v30.4s; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + sm4e b3.4s, v31.4s; \ + sm4e b4.4s, v31.4s; \ + sm4e b5.4s, v31.4s; \ + sm4e b6.4s, v31.4s; \ + sm4e b7.4s, v31.4s; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + rev64 b3.4s, b3.4s; \ + rev64 b4.4s, b4.4s; \ + rev64 b5.4s, b5.4s; \ + rev64 b6.4s, b6.4s; \ + rev64 b7.4s, b7.4s; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + ext b3.16b, b3.16b, b3.16b, #8; \ + ext b4.16b, b4.16b, b4.16b, #8; \ + ext b5.16b, b5.16b, b5.16b, #8; \ + ext b6.16b, b6.16b, b6.16b, #8; \ + ext b7.16b, b7.16b, b7.16b, #8; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; + +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + rev32 b4.16b, b4.16b; \ + rev32 b5.16b, b5.16b; \ + rev32 b6.16b, b6.16b; \ + rev32 b7.16b, b7.16b; \ + SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7); diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S new file mode 100644 index 0000000000000000000000000000000000000000..028207c4afd0f76037336cf030375dd9b3ab4011 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-ccm-core.S @@ -0,0 +1,328 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include "sm4-ce-asm.h" + +.arch armv8-a+crypto + +.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +#define RMAC v16 + +/* Helper macros. */ + +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; + + +.align 3 +SYM_FUNC_START(sm4_ce_cbcmac_update) + /* input: + * x0: round key array, CTX + * x1: mac + * x2: src + * w3: nblocks + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x1] + +.Lcbcmac_loop_4x: + cmp w3, #4 + blt .Lcbcmac_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v3.16b + + cbz w3, .Lcbcmac_end + b .Lcbcmac_loop_4x + +.Lcbcmac_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v0.16b + + cbnz w3, .Lcbcmac_loop_1x + +.Lcbcmac_end: + st1 {RMAC.16b}, [x1] + ret +SYM_FUNC_END(sm4_ce_cbcmac_update) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_final) + /* input: + * x0: round key array, CTX + * x1: ctr0 (big endian, 128 bit) + * x2: mac + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x2] + ld1 {v0.16b}, [x1] + + SM4_CRYPT_BLK2(RMAC, v0) + + /* en-/decrypt the mac with ctr0 */ + eor RMAC.16b, RMAC.16b, v0.16b + st1 {RMAC.16b}, [x2] + + ret +SYM_FUNC_END(sm4_ce_ccm_final) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: mac + */ + SM4_PREPARE(x0) + + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 + + ld1 {RMAC.16b}, [x5] + +.Lccm_enc_loop_4x: + cmp w4, #(4 * 16) + blt .Lccm_enc_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc_le128(v8) /* +0 */ + inc_le128(v9) /* +1 */ + inc_le128(v10) /* +2 */ + inc_le128(v11) /* +3 */ + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK2(v9, RMAC) + eor v9.16b, v9.16b, v1.16b + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK2(v10, RMAC) + eor v10.16b, v10.16b, v2.16b + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK2(v11, RMAC) + eor v11.16b, v11.16b, v3.16b + eor RMAC.16b, RMAC.16b, v3.16b + + st1 {v8.16b-v11.16b}, [x1], #64 + + cbz w4, .Lccm_enc_end + b .Lccm_enc_loop_4x + +.Lccm_enc_loop_1x: + cmp w4, #16 + blt .Lccm_enc_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc_le128(v8) + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v0.16b + + st1 {v8.16b}, [x1], #16 + + cbz w4, .Lccm_enc_end + b .Lccm_enc_loop_1x + +.Lccm_enc_tail: + /* construct CTRs */ + inc_le128(v8) + + SM4_CRYPT_BLK2(RMAC, v8) + + /* store new MAC */ + st1 {RMAC.16b}, [x5] + +.Lccm_enc_tail_loop: + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w9, v8.b[0] /* get top crypted CTR byte */ + umov w6, RMAC.b[0] /* get top MAC byte */ + + eor w9, w9, w0 /* w9 = CTR ^ input */ + eor w6, w6, w0 /* w6 = MAC ^ input */ + + strb w9, [x1], #1 /* store out byte */ + strb w6, [x5], #1 /* store MAC byte */ + + subs w4, w4, #1 + beq .Lccm_enc_ret + + /* shift out one byte */ + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 + ext v8.16b, v8.16b, v8.16b, #1 + + b .Lccm_enc_tail_loop + +.Lccm_enc_end: + /* store new MAC */ + st1 {RMAC.16b}, [x5] + + /* store new CTR */ + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] + +.Lccm_enc_ret: + ret +SYM_FUNC_END(sm4_ce_ccm_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_ccm_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: mac + */ + SM4_PREPARE(x0) + + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 + + ld1 {RMAC.16b}, [x5] + +.Lccm_dec_loop_4x: + cmp w4, #(4 * 16) + blt .Lccm_dec_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc_le128(v8) /* +0 */ + inc_le128(v9) /* +1 */ + inc_le128(v10) /* +2 */ + inc_le128(v11) /* +3 */ + + ld1 {v0.16b-v3.16b}, [x2], #64 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v8.16b + SM4_CRYPT_BLK2(v9, RMAC) + eor v9.16b, v9.16b, v1.16b + eor RMAC.16b, RMAC.16b, v9.16b + SM4_CRYPT_BLK2(v10, RMAC) + eor v10.16b, v10.16b, v2.16b + eor RMAC.16b, RMAC.16b, v10.16b + SM4_CRYPT_BLK2(v11, RMAC) + eor v11.16b, v11.16b, v3.16b + eor RMAC.16b, RMAC.16b, v11.16b + + st1 {v8.16b-v11.16b}, [x1], #64 + + cbz w4, .Lccm_dec_end + b .Lccm_dec_loop_4x + +.Lccm_dec_loop_1x: + cmp w4, #16 + blt .Lccm_dec_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc_le128(v8) + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK2(v8, RMAC) + eor v8.16b, v8.16b, v0.16b + eor RMAC.16b, RMAC.16b, v8.16b + + st1 {v8.16b}, [x1], #16 + + cbz w4, .Lccm_dec_end + b .Lccm_dec_loop_1x + +.Lccm_dec_tail: + /* construct CTRs */ + inc_le128(v8) + + SM4_CRYPT_BLK2(RMAC, v8) + + /* store new MAC */ + st1 {RMAC.16b}, [x5] + +.Lccm_dec_tail_loop: + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w9, v8.b[0] /* get top crypted CTR byte */ + umov w6, RMAC.b[0] /* get top MAC byte */ + + eor w9, w9, w0 /* w9 = CTR ^ input */ + eor w6, w6, w9 /* w6 = MAC ^ output */ + + strb w9, [x1], #1 /* store out byte */ + strb w6, [x5], #1 /* store MAC byte */ + + subs w4, w4, #1 + beq .Lccm_dec_ret + + /* shift out one byte */ + ext RMAC.16b, RMAC.16b, RMAC.16b, #1 + ext v8.16b, v8.16b, v8.16b, #1 + + b .Lccm_dec_tail_loop + +.Lccm_dec_end: + /* store new MAC */ + st1 {RMAC.16b}, [x5] + + /* store new CTR */ + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] + +.Lccm_dec_ret: + ret +SYM_FUNC_END(sm4_ce_ccm_dec) diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c new file mode 100644 index 0000000000000000000000000000000000000000..f2cec7b52efcd15f9d1dd59194384524e1a0d6e1 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c @@ -0,0 +1,303 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sm4-ce.h" + +asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac, + const u8 *src, unsigned int nblocks); +asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes, u8 *mac); +asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes, u8 *mac); +asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac); + + +static int ccm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_ctx *ctx = crypto_aead_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + + return 0; +} + +static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + if ((authsize & 1) || authsize < 4) + return -EINVAL; + return 0; +} + +static int ccm_format_input(u8 info[], struct aead_request *req, + unsigned int msglen) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int l = req->iv[0] + 1; + unsigned int m; + __be32 len; + + /* verify that CCM dimension 'L': 2 <= L <= 8 */ + if (l < 2 || l > 8) + return -EINVAL; + if (l < 4 && msglen >> (8 * l)) + return -EOVERFLOW; + + memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l); + + memcpy(info, req->iv, SM4_BLOCK_SIZE); + + m = crypto_aead_authsize(aead); + + /* format flags field per RFC 3610/NIST 800-38C */ + *info |= ((m - 2) / 2) << 3; + if (req->assoclen) + *info |= (1 << 6); + + /* + * format message length field, + * Linux uses a u32 type to represent msglen + */ + if (l >= 4) + l = 4; + + len = cpu_to_be32(msglen); + memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l); + + return 0; +} + +static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + struct __packed { __be16 l; __be32 h; } aadlen; + u32 assoclen = req->assoclen; + struct scatter_walk walk; + unsigned int len; + + if (assoclen < 0xff00) { + aadlen.l = cpu_to_be16(assoclen); + len = 2; + } else { + aadlen.l = cpu_to_be16(0xfffe); + put_unaligned_be32(assoclen, &aadlen.h); + len = 6; + } + + sm4_ce_crypt_block(ctx->rkey_enc, mac, mac); + crypto_xor(mac, (const u8 *)&aadlen, len); + + scatterwalk_start(&walk, req->src); + + do { + u32 n = scatterwalk_clamp(&walk, assoclen); + u8 *p, *ptr; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, assoclen); + } + + p = ptr = scatterwalk_map(&walk); + assoclen -= n; + scatterwalk_advance(&walk, n); + + while (n > 0) { + unsigned int l, nblocks; + + if (len == SM4_BLOCK_SIZE) { + if (n < SM4_BLOCK_SIZE) { + sm4_ce_crypt_block(ctx->rkey_enc, + mac, mac); + + len = 0; + } else { + nblocks = n / SM4_BLOCK_SIZE; + sm4_ce_cbcmac_update(ctx->rkey_enc, + mac, ptr, nblocks); + + ptr += nblocks * SM4_BLOCK_SIZE; + n %= SM4_BLOCK_SIZE; + + continue; + } + } + + l = min(n, SM4_BLOCK_SIZE - len); + if (l) { + crypto_xor(mac + len, ptr, l); + len += l; + ptr += l; + n -= l; + } + } + + scatterwalk_unmap(p); + scatterwalk_done(&walk, 0, assoclen); + } while (assoclen); +} + +static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk, + u32 *rkey_enc, u8 mac[], + void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *mac)) +{ + u8 __aligned(8) ctr0[SM4_BLOCK_SIZE]; + int err; + + /* preserve the initial ctr0 for the TAG */ + memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE); + crypto_inc(walk->iv, SM4_BLOCK_SIZE); + + kernel_neon_begin(); + + if (req->assoclen) + ccm_calculate_auth_mac(req, mac); + + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + + if (walk->nbytes == walk->total) + tail = 0; + + if (walk->nbytes - tail) + sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv, + walk->nbytes - tail, mac); + + if (walk->nbytes == walk->total) + sm4_ce_ccm_final(rkey_enc, ctr0, mac); + + kernel_neon_end(); + + if (walk->nbytes) { + err = skcipher_walk_done(walk, tail); + if (err) + return err; + if (walk->nbytes) + kernel_neon_begin(); + } + } while (walk->nbytes > 0); + + return 0; +} + +static int ccm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = ccm_format_input(mac, req, req->cryptlen); + if (err) + return err; + + err = skcipher_walk_aead_encrypt(&walk, req, false); + if (err) + return err; + + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc); + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int ccm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int authsize = crypto_aead_authsize(aead); + struct sm4_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) mac[SM4_BLOCK_SIZE]; + u8 authtag[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = ccm_format_input(mac, req, req->cryptlen - authsize); + if (err) + return err; + + err = skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; + + err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec); + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(authtag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + + if (crypto_memneq(authtag, mac, authsize)) + return -EBADMSG; + + return 0; +} + +static struct aead_alg sm4_ccm_alg = { + .base = { + .cra_name = "ccm(sm4)", + .cra_driver_name = "ccm-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .ivsize = SM4_BLOCK_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .maxauthsize = SM4_BLOCK_SIZE, + .setkey = ccm_setkey, + .setauthsize = ccm_setauthsize, + .encrypt = ccm_encrypt, + .decrypt = ccm_decrypt, +}; + +static int __init sm4_ce_ccm_init(void) +{ + return crypto_register_aead(&sm4_ccm_alg); +} + +static void __exit sm4_ce_ccm_exit(void) +{ + crypto_unregister_aead(&sm4_ccm_alg); +} + +module_cpu_feature_match(SM4, sm4_ce_ccm_init); +module_exit(sm4_ce_ccm_exit); + +MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("ccm(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c index 76a34ef4abbbf198d1fe62e898c37da4c4d3e365..c31d76fb5a17707440b299d164c68a41e2308f57 100644 --- a/arch/arm64/crypto/sm4-ce-cipher-glue.c +++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c @@ -2,11 +2,11 @@ #include #include +#include #include #include #include #include -#include #include MODULE_ALIAS_CRYPTO("sm4"); diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S index 934e0f093279968362d7ddc2e4c1e7774c34077e..877b80c54a0d3aa8610a28256964847387585609 100644 --- a/arch/arm64/crypto/sm4-ce-core.S +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -10,10 +10,12 @@ #include #include +#include "sm4-ce-asm.h" .arch armv8-a+crypto -.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31 +.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \ + 20, 24, 25, 26, 27, 28, 29, 30, 31 .set .Lv\b\().4s, \b .endr @@ -33,174 +35,8 @@ #define RTMP3 v19 #define RIV v20 - -/* Helper macros. */ - -#define PREPARE \ - ld1 {v24.16b-v27.16b}, [x0], #64; \ - ld1 {v28.16b-v31.16b}, [x0]; - -#define SM4_CRYPT_BLK(b0) \ - rev32 b0.16b, b0.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - rev32 b0.16b, b0.16b; - -#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b1.4s, v24.4s; \ - sm4e b2.4s, v24.4s; \ - sm4e b3.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b1.4s, v25.4s; \ - sm4e b2.4s, v25.4s; \ - sm4e b3.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b1.4s, v26.4s; \ - sm4e b2.4s, v26.4s; \ - sm4e b3.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b1.4s, v27.4s; \ - sm4e b2.4s, v27.4s; \ - sm4e b3.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b1.4s, v28.4s; \ - sm4e b2.4s, v28.4s; \ - sm4e b3.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b1.4s, v29.4s; \ - sm4e b2.4s, v29.4s; \ - sm4e b3.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b1.4s, v30.4s; \ - sm4e b2.4s, v30.4s; \ - sm4e b3.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - sm4e b1.4s, v31.4s; \ - sm4e b2.4s, v31.4s; \ - sm4e b3.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - rev64 b1.4s, b1.4s; \ - rev64 b2.4s, b2.4s; \ - rev64 b3.4s, b3.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - ext b1.16b, b1.16b, b1.16b, #8; \ - ext b2.16b, b2.16b, b2.16b, #8; \ - ext b3.16b, b3.16b, b3.16b, #8; \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; - -#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - rev32 b4.16b, b4.16b; \ - rev32 b5.16b, b5.16b; \ - rev32 b6.16b, b6.16b; \ - rev32 b7.16b, b7.16b; \ - sm4e b0.4s, v24.4s; \ - sm4e b1.4s, v24.4s; \ - sm4e b2.4s, v24.4s; \ - sm4e b3.4s, v24.4s; \ - sm4e b4.4s, v24.4s; \ - sm4e b5.4s, v24.4s; \ - sm4e b6.4s, v24.4s; \ - sm4e b7.4s, v24.4s; \ - sm4e b0.4s, v25.4s; \ - sm4e b1.4s, v25.4s; \ - sm4e b2.4s, v25.4s; \ - sm4e b3.4s, v25.4s; \ - sm4e b4.4s, v25.4s; \ - sm4e b5.4s, v25.4s; \ - sm4e b6.4s, v25.4s; \ - sm4e b7.4s, v25.4s; \ - sm4e b0.4s, v26.4s; \ - sm4e b1.4s, v26.4s; \ - sm4e b2.4s, v26.4s; \ - sm4e b3.4s, v26.4s; \ - sm4e b4.4s, v26.4s; \ - sm4e b5.4s, v26.4s; \ - sm4e b6.4s, v26.4s; \ - sm4e b7.4s, v26.4s; \ - sm4e b0.4s, v27.4s; \ - sm4e b1.4s, v27.4s; \ - sm4e b2.4s, v27.4s; \ - sm4e b3.4s, v27.4s; \ - sm4e b4.4s, v27.4s; \ - sm4e b5.4s, v27.4s; \ - sm4e b6.4s, v27.4s; \ - sm4e b7.4s, v27.4s; \ - sm4e b0.4s, v28.4s; \ - sm4e b1.4s, v28.4s; \ - sm4e b2.4s, v28.4s; \ - sm4e b3.4s, v28.4s; \ - sm4e b4.4s, v28.4s; \ - sm4e b5.4s, v28.4s; \ - sm4e b6.4s, v28.4s; \ - sm4e b7.4s, v28.4s; \ - sm4e b0.4s, v29.4s; \ - sm4e b1.4s, v29.4s; \ - sm4e b2.4s, v29.4s; \ - sm4e b3.4s, v29.4s; \ - sm4e b4.4s, v29.4s; \ - sm4e b5.4s, v29.4s; \ - sm4e b6.4s, v29.4s; \ - sm4e b7.4s, v29.4s; \ - sm4e b0.4s, v30.4s; \ - sm4e b1.4s, v30.4s; \ - sm4e b2.4s, v30.4s; \ - sm4e b3.4s, v30.4s; \ - sm4e b4.4s, v30.4s; \ - sm4e b5.4s, v30.4s; \ - sm4e b6.4s, v30.4s; \ - sm4e b7.4s, v30.4s; \ - sm4e b0.4s, v31.4s; \ - sm4e b1.4s, v31.4s; \ - sm4e b2.4s, v31.4s; \ - sm4e b3.4s, v31.4s; \ - sm4e b4.4s, v31.4s; \ - sm4e b5.4s, v31.4s; \ - sm4e b6.4s, v31.4s; \ - sm4e b7.4s, v31.4s; \ - rev64 b0.4s, b0.4s; \ - rev64 b1.4s, b1.4s; \ - rev64 b2.4s, b2.4s; \ - rev64 b3.4s, b3.4s; \ - rev64 b4.4s, b4.4s; \ - rev64 b5.4s, b5.4s; \ - rev64 b6.4s, b6.4s; \ - rev64 b7.4s, b7.4s; \ - ext b0.16b, b0.16b, b0.16b, #8; \ - ext b1.16b, b1.16b, b1.16b, #8; \ - ext b2.16b, b2.16b, b2.16b, #8; \ - ext b3.16b, b3.16b, b3.16b, #8; \ - ext b4.16b, b4.16b, b4.16b, #8; \ - ext b5.16b, b5.16b, b5.16b, #8; \ - ext b6.16b, b6.16b, b6.16b, #8; \ - ext b7.16b, b7.16b, b7.16b, #8; \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - rev32 b4.16b, b4.16b; \ - rev32 b5.16b, b5.16b; \ - rev32 b6.16b, b6.16b; \ - rev32 b7.16b, b7.16b; +#define RMAC v20 +#define RMASK v21 .align 3 @@ -231,32 +67,23 @@ SYM_FUNC_START(sm4_ce_expand_key) sm4ekey v6.4s, v5.4s, v30.4s; sm4ekey v7.4s, v6.4s, v31.4s; + adr_l x5, .Lbswap128_mask + ld1 {v24.16b}, [x5] + st1 {v0.16b-v3.16b}, [x1], #64; st1 {v4.16b-v7.16b}, [x1]; - rev64 v7.4s, v7.4s; - rev64 v6.4s, v6.4s; - rev64 v5.4s, v5.4s; - rev64 v4.4s, v4.4s; - rev64 v3.4s, v3.4s; - rev64 v2.4s, v2.4s; - rev64 v1.4s, v1.4s; - rev64 v0.4s, v0.4s; - ext v7.16b, v7.16b, v7.16b, #8; - ext v6.16b, v6.16b, v6.16b, #8; - ext v5.16b, v5.16b, v5.16b, #8; - ext v4.16b, v4.16b, v4.16b, #8; - ext v3.16b, v3.16b, v3.16b, #8; - ext v2.16b, v2.16b, v2.16b, #8; - ext v1.16b, v1.16b, v1.16b, #8; - ext v0.16b, v0.16b, v0.16b, #8; - st1 {v7.16b}, [x2], #16; - st1 {v6.16b}, [x2], #16; - st1 {v5.16b}, [x2], #16; - st1 {v4.16b}, [x2], #16; - st1 {v3.16b}, [x2], #16; - st1 {v2.16b}, [x2], #16; - st1 {v1.16b}, [x2], #16; - st1 {v0.16b}, [x2]; + + tbl v16.16b, {v7.16b}, v24.16b + tbl v17.16b, {v6.16b}, v24.16b + tbl v18.16b, {v5.16b}, v24.16b + tbl v19.16b, {v4.16b}, v24.16b + tbl v20.16b, {v3.16b}, v24.16b + tbl v21.16b, {v2.16b}, v24.16b + tbl v22.16b, {v1.16b}, v24.16b + tbl v23.16b, {v0.16b}, v24.16b + + st1 {v16.16b-v19.16b}, [x2], #64 + st1 {v20.16b-v23.16b}, [x2] ret; SYM_FUNC_END(sm4_ce_expand_key) @@ -268,7 +95,7 @@ SYM_FUNC_START(sm4_ce_crypt_block) * x1: dst * x2: src */ - PREPARE; + SM4_PREPARE(x0) ld1 {v0.16b}, [x2]; SM4_CRYPT_BLK(v0); @@ -285,7 +112,7 @@ SYM_FUNC_START(sm4_ce_crypt) * x2: src * w3: nblocks */ - PREPARE; + SM4_PREPARE(x0) .Lcrypt_loop_blk: sub w3, w3, #8; @@ -337,26 +164,50 @@ SYM_FUNC_START(sm4_ce_cbc_enc) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) + + ld1 {RIV.16b}, [x3] + +.Lcbc_enc_loop_4x: + cmp w4, #4 + blt .Lcbc_enc_loop_1x + + sub w4, w4, #4 - ld1 {RIV.16b}, [x3]; + ld1 {v0.16b-v3.16b}, [x2], #64 -.Lcbc_enc_loop: - sub w4, w4, #1; + eor v0.16b, v0.16b, RIV.16b + SM4_CRYPT_BLK(v0) + eor v1.16b, v1.16b, v0.16b + SM4_CRYPT_BLK(v1) + eor v2.16b, v2.16b, v1.16b + SM4_CRYPT_BLK(v2) + eor v3.16b, v3.16b, v2.16b + SM4_CRYPT_BLK(v3) - ld1 {RTMP0.16b}, [x2], #16; - eor RIV.16b, RIV.16b, RTMP0.16b; + st1 {v0.16b-v3.16b}, [x1], #64 + mov RIV.16b, v3.16b - SM4_CRYPT_BLK(RIV); + cbz w4, .Lcbc_enc_end + b .Lcbc_enc_loop_4x - st1 {RIV.16b}, [x1], #16; +.Lcbc_enc_loop_1x: + sub w4, w4, #1 - cbnz w4, .Lcbc_enc_loop; + ld1 {v0.16b}, [x2], #16 + eor RIV.16b, RIV.16b, v0.16b + SM4_CRYPT_BLK(RIV) + + st1 {RIV.16b}, [x1], #16 + + cbnz w4, .Lcbc_enc_loop_1x + +.Lcbc_enc_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cbc_enc) .align 3 @@ -368,81 +219,189 @@ SYM_FUNC_START(sm4_ce_cbc_dec) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {RIV.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcbc_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lcbc_tail8; +.Lcbc_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcbc_dec_4x - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2]; + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + rev32 v8.16b, v0.16b + rev32 v9.16b, v1.16b + rev32 v10.16b, v2.16b + rev32 v11.16b, v3.16b + rev32 v12.16b, v4.16b + rev32 v13.16b, v5.16b + rev32 v14.16b, v6.16b + rev32 v15.16b, v7.16b - sub x2, x2, #64; - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) - eor v4.16b, v4.16b, RTMP3.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v5.16b, v5.16b, RTMP0.16b; - eor v6.16b, v6.16b, RTMP1.16b; - eor v7.16b, v7.16b, RTMP2.16b; + eor v8.16b, v8.16b, RIV.16b + eor v9.16b, v9.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + eor v11.16b, v11.16b, v2.16b + eor v12.16b, v12.16b, v3.16b + eor v13.16b, v13.16b, v4.16b + eor v14.16b, v14.16b, v5.16b + eor v15.16b, v15.16b, v6.16b - mov RIV.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + st1 {v8.16b-v11.16b}, [x1], #64 + st1 {v12.16b-v15.16b}, [x1], #64 - cbz w4, .Lcbc_end; - b .Lcbc_loop_blk; + mov RIV.16b, v7.16b -.Lcbc_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lcbc_tail4; + cbz w4, .Lcbc_dec_end + b .Lcbc_dec_loop_8x - sub w4, w4, #4; +.Lcbc_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcbc_dec_loop_1x - ld1 {v0.16b-v3.16b}, [x2]; + sub w4, w4, #4 - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v0.16b-v3.16b}, [x2], #64 - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; + rev32 v8.16b, v0.16b + rev32 v9.16b, v1.16b + rev32 v10.16b, v2.16b + rev32 v11.16b, v3.16b - mov RIV.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) - cbz w4, .Lcbc_end; + eor v8.16b, v8.16b, RIV.16b + eor v9.16b, v9.16b, v0.16b + eor v10.16b, v10.16b, v1.16b + eor v11.16b, v11.16b, v2.16b -.Lcbc_tail4: - sub w4, w4, #1; + st1 {v8.16b-v11.16b}, [x1], #64 - ld1 {v0.16b}, [x2]; + mov RIV.16b, v3.16b - SM4_CRYPT_BLK(v0); + cbz w4, .Lcbc_dec_end - eor v0.16b, v0.16b, RIV.16b; - ld1 {RIV.16b}, [x2], #16; - st1 {v0.16b}, [x1], #16; +.Lcbc_dec_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 - cbnz w4, .Lcbc_tail4; + rev32 v8.16b, v0.16b -.Lcbc_end: + SM4_CRYPT_BLK_BE(v8) + + eor v8.16b, v8.16b, RIV.16b + st1 {v8.16b}, [x1], #16 + + mov RIV.16b, v0.16b + + cbnz w4, .Lcbc_dec_loop_1x + +.Lcbc_dec_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cbc_dec) +.align 3 +SYM_FUNC_START(sm4_ce_cbc_cts_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nbytes + */ + SM4_PREPARE(x0) + + sub w5, w4, #16 + uxtw x5, w5 + + ld1 {RIV.16b}, [x3] + + ld1 {v0.16b}, [x2] + eor RIV.16b, RIV.16b, v0.16b + SM4_CRYPT_BLK(RIV) + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v0.16b, {RIV.16b}, v3.16b + /* padding Pn with zeros */ + tbl v1.16b, {v1.16b}, v4.16b + + eor v1.16b, v1.16b, RIV.16b + SM4_CRYPT_BLK(v1) + + /* overlapping stores */ + add x5, x1, x5 + st1 {v0.16b}, [x5] + st1 {v1.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_cbc_cts_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_cbc_cts_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: iv (big endian, 128 bit) + * w4: nbytes + */ + SM4_PREPARE(x0) + + sub w5, w4, #16 + uxtw x5, w5 + + ld1 {RIV.16b}, [x3] + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + ld1 {v0.16b}, [x2], x5 + ld1 {v1.16b}, [x2] + + SM4_CRYPT_BLK(v0) + /* select the first Ln bytes of Xn to create Pn */ + tbl v2.16b, {v0.16b}, v3.16b + eor v2.16b, v2.16b, v1.16b + + /* overwrite the first Ln bytes with Cn to create En-1 */ + tbx v0.16b, {v1.16b}, v4.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, RIV.16b + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_cbc_cts_dec) + .align 3 SYM_FUNC_START(sm4_ce_cfb_enc) /* input: @@ -452,25 +411,57 @@ SYM_FUNC_START(sm4_ce_cfb_enc) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) + + ld1 {RIV.16b}, [x3] + +.Lcfb_enc_loop_4x: + cmp w4, #4 + blt .Lcfb_enc_loop_1x + + sub w4, w4, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + rev32 v8.16b, RIV.16b + SM4_CRYPT_BLK_BE(v8) + eor v0.16b, v0.16b, v8.16b + + rev32 v8.16b, v0.16b + SM4_CRYPT_BLK_BE(v8) + eor v1.16b, v1.16b, v8.16b + + rev32 v8.16b, v1.16b + SM4_CRYPT_BLK_BE(v8) + eor v2.16b, v2.16b, v8.16b + + rev32 v8.16b, v2.16b + SM4_CRYPT_BLK_BE(v8) + eor v3.16b, v3.16b, v8.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + mov RIV.16b, v3.16b + + cbz w4, .Lcfb_enc_end + b .Lcfb_enc_loop_4x - ld1 {RIV.16b}, [x3]; +.Lcfb_enc_loop_1x: + sub w4, w4, #1 -.Lcfb_enc_loop: - sub w4, w4, #1; + ld1 {v0.16b}, [x2], #16 - SM4_CRYPT_BLK(RIV); + SM4_CRYPT_BLK(RIV) + eor RIV.16b, RIV.16b, v0.16b - ld1 {RTMP0.16b}, [x2], #16; - eor RIV.16b, RIV.16b, RTMP0.16b; - st1 {RIV.16b}, [x1], #16; + st1 {RIV.16b}, [x1], #16 - cbnz w4, .Lcfb_enc_loop; + cbnz w4, .Lcfb_enc_loop_1x +.Lcfb_enc_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cfb_enc) .align 3 @@ -482,79 +473,91 @@ SYM_FUNC_START(sm4_ce_cfb_dec) * x3: iv (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ld1 {v0.16b}, [x3]; + ld1 {RIV.16b}, [x3] -.Lcfb_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lcfb_tail8; +.Lcfb_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcfb_dec_4x - ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; - ld1 {v4.16b-v7.16b}, [x2]; + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + rev32 v8.16b, RIV.16b + rev32 v9.16b, v0.16b + rev32 v10.16b, v1.16b + rev32 v11.16b, v2.16b + rev32 v12.16b, v3.16b + rev32 v13.16b, v4.16b + rev32 v14.16b, v5.16b + rev32 v15.16b, v6.16b - sub x2, x2, #48; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15) - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + mov RIV.16b, v7.16b - mov v0.16b, RTMP3.16b; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b - cbz w4, .Lcfb_end; - b .Lcfb_loop_blk; + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 -.Lcfb_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lcfb_tail4; + cbz w4, .Lcfb_dec_end + b .Lcfb_dec_loop_8x - sub w4, w4, #4; +.Lcfb_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcfb_dec_loop_1x - ld1 {v1.16b, v2.16b, v3.16b}, [x2]; + sub w4, w4, #4 - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v0.16b-v3.16b}, [x2], #64 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + rev32 v8.16b, RIV.16b + rev32 v9.16b, v0.16b + rev32 v10.16b, v1.16b + rev32 v11.16b, v2.16b - mov v0.16b, RTMP3.16b; + SM4_CRYPT_BLK4_BE(v8, v9, v10, v11) - cbz w4, .Lcfb_end; + mov RIV.16b, v3.16b -.Lcfb_tail4: - sub w4, w4, #1; + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b - SM4_CRYPT_BLK(v0); + st1 {v0.16b-v3.16b}, [x1], #64 - ld1 {RTMP0.16b}, [x2], #16; - eor v0.16b, v0.16b, RTMP0.16b; - st1 {v0.16b}, [x1], #16; + cbz w4, .Lcfb_dec_end + +.Lcfb_dec_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + + SM4_CRYPT_BLK(RIV) - mov v0.16b, RTMP0.16b; + eor RIV.16b, RIV.16b, v0.16b + st1 {RIV.16b}, [x1], #16 - cbnz w4, .Lcfb_tail4; + mov RIV.16b, v0.16b -.Lcfb_end: + cbnz w4, .Lcfb_dec_loop_1x + +.Lcfb_dec_end: /* store new IV */ - st1 {v0.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; + ret SYM_FUNC_END(sm4_ce_cfb_dec) .align 3 @@ -566,95 +569,525 @@ SYM_FUNC_START(sm4_ce_ctr_enc) * x3: ctr (big endian, 128 bit) * w4: nblocks */ - PREPARE; + SM4_PREPARE(x0) - ldp x7, x8, [x3]; - rev x7, x7; - rev x8, x8; + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 -.Lctr_loop_blk: - sub w4, w4, #8; - tbnz w4, #31, .Lctr_tail8; +.Lctr_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lctr_4x -#define inc_le128(vctr) \ - mov vctr.d[1], x8; \ - mov vctr.d[0], x7; \ - adds x8, x8, #1; \ - adc x7, x7, xzr; \ - rev64 vctr.16b, vctr.16b; +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ - inc_le128(v4); /* +4 */ - inc_le128(v5); /* +5 */ - inc_le128(v6); /* +6 */ - inc_le128(v7); /* +7 */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + inc_le128(v4) /* +4 */ + inc_le128(v5) /* +5 */ + inc_le128(v6) /* +6 */ + inc_le128(v7) /* +7 */ + + ld1 {v8.16b-v11.16b}, [x2], #64 + ld1 {v12.16b-v15.16b}, [x2], #64 + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lctr_end + b .Lctr_loop_8x + +.Lctr_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lctr_loop_1x + + sub w4, w4, #4 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + /* construct CTRs */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + ld1 {v8.16b-v11.16b}, [x2], #64 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b - cbz w4, .Lctr_end; - b .Lctr_loop_blk; + st1 {v0.16b-v3.16b}, [x1], #64 -.Lctr_tail8: - add w4, w4, #8; - cmp w4, #4; - blt .Lctr_tail4; + cbz w4, .Lctr_end - sub w4, w4, #4; +.Lctr_loop_1x: + sub w4, w4, #1 /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ + inc_le128(v0) - SM4_CRYPT_BLK4(v0, v1, v2, v3); + ld1 {v8.16b}, [x2], #16 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + SM4_CRYPT_BLK(v0) - cbz w4, .Lctr_end; + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 -.Lctr_tail4: - sub w4, w4, #1; + cbnz w4, .Lctr_loop_1x - /* construct CTRs */ - inc_le128(v0); +.Lctr_end: + /* store new CTR */ + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] - SM4_CRYPT_BLK(v0); + ret +SYM_FUNC_END(sm4_ce_ctr_enc) - ld1 {RTMP0.16b}, [x2], #16; - eor v0.16b, v0.16b, RTMP0.16b; - st1 {v0.16b}, [x1], #16; - cbnz w4, .Lctr_tail4; +#define tweak_next(vt, vin, RTMP) \ + sshr RTMP.2d, vin.2d, #63; \ + and RTMP.16b, RTMP.16b, RMASK.16b; \ + add vt.2d, vin.2d, vin.2d; \ + ext RTMP.16b, RTMP.16b, RTMP.16b, #8; \ + eor vt.16b, vt.16b, RTMP.16b; -.Lctr_end: - /* store new CTR */ - rev x7, x7; - rev x8, x8; - stp x7, x8, [x3]; +.align 3 +SYM_FUNC_START(sm4_ce_xts_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: tweak (big endian, 128 bit) + * w4: nbytes + * x5: round key array for IV + */ + ld1 {v8.16b}, [x3] - ret; -SYM_FUNC_END(sm4_ce_ctr_enc) + cbz x5, .Lxts_enc_nofirst + + SM4_PREPARE(x5) + + /* Generate first tweak */ + SM4_CRYPT_BLK(v8) + +.Lxts_enc_nofirst: + SM4_PREPARE(x0) + + ands w5, w4, #15 + lsr w4, w4, #4 + sub w6, w4, #1 + csel w4, w4, w6, eq + uxtw x5, w5 + + movi RMASK.2s, #0x1 + movi RTMP0.2s, #0x87 + uzp1 RMASK.4s, RMASK.4s, RTMP0.4s + + cbz w4, .Lxts_enc_cts + +.Lxts_enc_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lxts_enc_4x + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + tweak_next(v12, v11, RTMP3) + tweak_next(v13, v12, RTMP0) + tweak_next(v14, v13, RTMP1) + tweak_next(v15, v14, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + tweak_next(v8, v15, RTMP3) + + cbz w4, .Lxts_enc_cts + b .Lxts_enc_loop_8x + +.Lxts_enc_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lxts_enc_loop_1x + + sub w4, w4, #4 + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + tweak_next(v8, v11, RTMP3) + + cbz w4, .Lxts_enc_cts + +.Lxts_enc_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + eor v0.16b, v0.16b, v8.16b + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 + + tweak_next(v8, v8, RTMP0) + + cbnz w4, .Lxts_enc_loop_1x + +.Lxts_enc_cts: + cbz x5, .Lxts_enc_end + + /* cipher text stealing */ + + tweak_next(v9, v8, RTMP0) + ld1 {v0.16b}, [x2] + eor v0.16b, v0.16b, v8.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v8.16b + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v2.16b, {v0.16b}, v3.16b + /* padding Pn with En-1 at the end */ + tbx v0.16b, {v1.16b}, v4.16b + + eor v0.16b, v0.16b, v9.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v9.16b + + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + b .Lxts_enc_ret + +.Lxts_enc_end: + /* store new tweak */ + st1 {v8.16b}, [x3] + +.Lxts_enc_ret: + ret +SYM_FUNC_END(sm4_ce_xts_enc) + +.align 3 +SYM_FUNC_START(sm4_ce_xts_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: tweak (big endian, 128 bit) + * w4: nbytes + * x5: round key array for IV + */ + ld1 {v8.16b}, [x3] + + cbz x5, .Lxts_dec_nofirst + + SM4_PREPARE(x5) + + /* Generate first tweak */ + SM4_CRYPT_BLK(v8) + +.Lxts_dec_nofirst: + SM4_PREPARE(x0) + + ands w5, w4, #15 + lsr w4, w4, #4 + sub w6, w4, #1 + csel w4, w4, w6, eq + uxtw x5, w5 + + movi RMASK.2s, #0x1 + movi RTMP0.2s, #0x87 + uzp1 RMASK.4s, RMASK.4s, RTMP0.4s + + cbz w4, .Lxts_dec_cts + +.Lxts_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lxts_dec_4x + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + tweak_next(v12, v11, RTMP3) + tweak_next(v13, v12, RTMP0) + tweak_next(v14, v13, RTMP1) + tweak_next(v15, v14, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + ld1 {v4.16b-v7.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + eor v4.16b, v4.16b, v12.16b + eor v5.16b, v5.16b, v13.16b + eor v6.16b, v6.16b, v14.16b + eor v7.16b, v7.16b, v15.16b + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + tweak_next(v8, v15, RTMP3) + + cbz w4, .Lxts_dec_cts + b .Lxts_dec_loop_8x + +.Lxts_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lxts_dec_loop_1x + + sub w4, w4, #4 + + tweak_next( v9, v8, RTMP0) + tweak_next(v10, v9, RTMP1) + tweak_next(v11, v10, RTMP2) + + ld1 {v0.16b-v3.16b}, [x2], #64 + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v8.16b + eor v1.16b, v1.16b, v9.16b + eor v2.16b, v2.16b, v10.16b + eor v3.16b, v3.16b, v11.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + tweak_next(v8, v11, RTMP3) + + cbz w4, .Lxts_dec_cts + +.Lxts_dec_loop_1x: + sub w4, w4, #1 + + ld1 {v0.16b}, [x2], #16 + eor v0.16b, v0.16b, v8.16b + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, v8.16b + st1 {v0.16b}, [x1], #16 + + tweak_next(v8, v8, RTMP0) + + cbnz w4, .Lxts_dec_loop_1x + +.Lxts_dec_cts: + cbz x5, .Lxts_dec_end + + /* cipher text stealing */ + + tweak_next(v9, v8, RTMP0) + ld1 {v0.16b}, [x2] + eor v0.16b, v0.16b, v9.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v9.16b + + /* load permute table */ + adr_l x6, .Lcts_permute_table + add x7, x6, #32 + add x6, x6, x5 + sub x7, x7, x5 + ld1 {v3.16b}, [x6] + ld1 {v4.16b}, [x7] + + /* overlapping loads */ + add x2, x2, x5 + ld1 {v1.16b}, [x2] + + /* create Cn from En-1 */ + tbl v2.16b, {v0.16b}, v3.16b + /* padding Pn with En-1 at the end */ + tbx v0.16b, {v1.16b}, v4.16b + + eor v0.16b, v0.16b, v8.16b + SM4_CRYPT_BLK(v0) + eor v0.16b, v0.16b, v8.16b + + + /* overlapping stores */ + add x5, x1, x5 + st1 {v2.16b}, [x5] + st1 {v0.16b}, [x1] + + b .Lxts_dec_ret + +.Lxts_dec_end: + /* store new tweak */ + st1 {v8.16b}, [x3] + +.Lxts_dec_ret: + ret +SYM_FUNC_END(sm4_ce_xts_dec) + +.align 3 +SYM_FUNC_START(sm4_ce_mac_update) + /* input: + * x0: round key array, CTX + * x1: digest + * x2: src + * w3: nblocks + * w4: enc_before + * w5: enc_after + */ + SM4_PREPARE(x0) + + ld1 {RMAC.16b}, [x1] + + cbz w4, .Lmac_update + + SM4_CRYPT_BLK(RMAC) + +.Lmac_update: + cbz w3, .Lmac_ret + + sub w6, w3, #1 + cmp w5, wzr + csel w3, w3, w6, ne + + cbz w3, .Lmac_end + +.Lmac_loop_4x: + cmp w3, #4 + blt .Lmac_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v1.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v2.16b + SM4_CRYPT_BLK(RMAC) + eor RMAC.16b, RMAC.16b, v3.16b + SM4_CRYPT_BLK(RMAC) + + cbz w3, .Lmac_end + b .Lmac_loop_4x + +.Lmac_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + + eor RMAC.16b, RMAC.16b, v0.16b + SM4_CRYPT_BLK(RMAC) + + cbnz w3, .Lmac_loop_1x + + +.Lmac_end: + cbnz w5, .Lmac_ret + + ld1 {v0.16b}, [x2], #16 + eor RMAC.16b, RMAC.16b, v0.16b + +.Lmac_ret: + st1 {RMAC.16b}, [x1] + ret +SYM_FUNC_END(sm4_ce_mac_update) + + + .section ".rodata", "a" + .align 4 +.Lbswap128_mask: + .byte 0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b + .byte 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03 + +.Lcts_permute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S new file mode 100644 index 0000000000000000000000000000000000000000..7aa3ec18a28912bc13d74c6b187b55a5b2d2cf2e --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-gcm-core.S @@ -0,0 +1,741 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2016 Jussi Kivilinna + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include "sm4-ce-asm.h" + +.arch armv8-a+crypto + +.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31 + .set .Lv\b\().4s, \b +.endr + +.macro sm4e, vd, vn + .inst 0xcec08400 | (.L\vn << 5) | .L\vd +.endm + +/* Register macros */ + +/* Used for both encryption and decryption */ +#define RHASH v21 +#define RRCONST v22 +#define RZERO v23 + +/* Helper macros. */ + +/* + * input: m0, m1 + * output: r0:r1 (low 128-bits in r0, high in r1) + */ +#define PMUL_128x128(r0, r1, m0, m1, T0, T1) \ + ext T0.16b, m1.16b, m1.16b, #8; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + eor T0.16b, T0.16b, T1.16b; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r1.16b, r1.16b, T0.16b; + +#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1, \ + r2, r3, m2, m3, T2, T3, \ + r4, r5, m4, m5, T4, T5, \ + r6, r7, m6, m7, T6, T7) \ + ext T0.16b, m1.16b, m1.16b, #8; \ + ext T2.16b, m3.16b, m3.16b, #8; \ + ext T4.16b, m5.16b, m5.16b, #8; \ + ext T6.16b, m7.16b, m7.16b, #8; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull r2.1q, m2.1d, m3.1d; \ + pmull r4.1q, m4.1d, m5.1d; \ + pmull r6.1q, m6.1d, m7.1d; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull T3.1q, m2.1d, T2.1d; \ + pmull T5.1q, m4.1d, T4.1d; \ + pmull T7.1q, m6.1d, T6.1d; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 T2.1q, m2.2d, T2.2d; \ + pmull2 T4.1q, m4.2d, T4.2d; \ + pmull2 T6.1q, m6.2d, T6.2d; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + pmull2 r3.1q, m2.2d, m3.2d; \ + pmull2 r5.1q, m4.2d, m5.2d; \ + pmull2 r7.1q, m6.2d, m7.2d; \ + eor T0.16b, T0.16b, T1.16b; \ + eor T2.16b, T2.16b, T3.16b; \ + eor T4.16b, T4.16b, T5.16b; \ + eor T6.16b, T6.16b, T7.16b; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T3.16b, RZERO.16b, T2.16b, #8; \ + ext T5.16b, RZERO.16b, T4.16b, #8; \ + ext T7.16b, RZERO.16b, T6.16b, #8; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + ext T2.16b, T2.16b, RZERO.16b, #8; \ + ext T4.16b, T4.16b, RZERO.16b, #8; \ + ext T6.16b, T6.16b, RZERO.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r2.16b, r2.16b, T3.16b; \ + eor r4.16b, r4.16b, T5.16b; \ + eor r6.16b, r6.16b, T7.16b; \ + eor r1.16b, r1.16b, T0.16b; \ + eor r3.16b, r3.16b, T2.16b; \ + eor r5.16b, r5.16b, T4.16b; \ + eor r7.16b, r7.16b, T6.16b; + +/* + * input: r0:r1 (low 128-bits in r0, high in r1) + * output: a + */ +#define REDUCTION(a, r0, r1, rconst, T0, T1) \ + pmull2 T0.1q, r1.2d, rconst.2d; \ + ext T1.16b, T0.16b, RZERO.16b, #8; \ + ext T0.16b, RZERO.16b, T0.16b, #8; \ + eor r1.16b, r1.16b, T1.16b; \ + eor r0.16b, r0.16b, T0.16b; \ + pmull T0.1q, r1.1d, rconst.1d; \ + eor a.16b, r0.16b, T0.16b; + +#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1) \ + rev32 b0.16b, b0.16b; \ + ext T0.16b, m1.16b, m1.16b, #8; \ + sm4e b0.4s, v24.4s; \ + pmull r0.1q, m0.1d, m1.1d; \ + sm4e b0.4s, v25.4s; \ + pmull T1.1q, m0.1d, T0.1d; \ + sm4e b0.4s, v26.4s; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + sm4e b0.4s, v27.4s; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + sm4e b0.4s, v28.4s; \ + eor T0.16b, T0.16b, T1.16b; \ + sm4e b0.4s, v29.4s; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + sm4e b0.4s, v30.4s; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + sm4e b0.4s, v31.4s; \ + eor r0.16b, r0.16b, T1.16b; \ + rev64 b0.4s, b0.4s; \ + eor r1.16b, r1.16b, T0.16b; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + rev32 b0.16b, b0.16b; + +#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2, \ + r0, r1, m0, m1, T0, T1, \ + r2, r3, m2, m3, T2, T3, \ + r4, r5, m4, m5, T4, T5) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + ext T0.16b, m1.16b, m1.16b, #8; \ + ext T2.16b, m3.16b, m3.16b, #8; \ + ext T4.16b, m5.16b, m5.16b, #8; \ + sm4e b0.4s, v24.4s; \ + sm4e b1.4s, v24.4s; \ + sm4e b2.4s, v24.4s; \ + pmull r0.1q, m0.1d, m1.1d; \ + pmull r2.1q, m2.1d, m3.1d; \ + pmull r4.1q, m4.1d, m5.1d; \ + sm4e b0.4s, v25.4s; \ + sm4e b1.4s, v25.4s; \ + sm4e b2.4s, v25.4s; \ + pmull T1.1q, m0.1d, T0.1d; \ + pmull T3.1q, m2.1d, T2.1d; \ + pmull T5.1q, m4.1d, T4.1d; \ + sm4e b0.4s, v26.4s; \ + sm4e b1.4s, v26.4s; \ + sm4e b2.4s, v26.4s; \ + pmull2 T0.1q, m0.2d, T0.2d; \ + pmull2 T2.1q, m2.2d, T2.2d; \ + pmull2 T4.1q, m4.2d, T4.2d; \ + sm4e b0.4s, v27.4s; \ + sm4e b1.4s, v27.4s; \ + sm4e b2.4s, v27.4s; \ + pmull2 r1.1q, m0.2d, m1.2d; \ + pmull2 r3.1q, m2.2d, m3.2d; \ + pmull2 r5.1q, m4.2d, m5.2d; \ + sm4e b0.4s, v28.4s; \ + sm4e b1.4s, v28.4s; \ + sm4e b2.4s, v28.4s; \ + eor T0.16b, T0.16b, T1.16b; \ + eor T2.16b, T2.16b, T3.16b; \ + eor T4.16b, T4.16b, T5.16b; \ + sm4e b0.4s, v29.4s; \ + sm4e b1.4s, v29.4s; \ + sm4e b2.4s, v29.4s; \ + ext T1.16b, RZERO.16b, T0.16b, #8; \ + ext T3.16b, RZERO.16b, T2.16b, #8; \ + ext T5.16b, RZERO.16b, T4.16b, #8; \ + sm4e b0.4s, v30.4s; \ + sm4e b1.4s, v30.4s; \ + sm4e b2.4s, v30.4s; \ + ext T0.16b, T0.16b, RZERO.16b, #8; \ + ext T2.16b, T2.16b, RZERO.16b, #8; \ + ext T4.16b, T4.16b, RZERO.16b, #8; \ + sm4e b0.4s, v31.4s; \ + sm4e b1.4s, v31.4s; \ + sm4e b2.4s, v31.4s; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r2.16b, r2.16b, T3.16b; \ + eor r4.16b, r4.16b, T5.16b; \ + rev64 b0.4s, b0.4s; \ + rev64 b1.4s, b1.4s; \ + rev64 b2.4s, b2.4s; \ + eor r1.16b, r1.16b, T0.16b; \ + eor r3.16b, r3.16b, T2.16b; \ + eor r5.16b, r5.16b, T4.16b; \ + ext b0.16b, b0.16b, b0.16b, #8; \ + ext b1.16b, b1.16b, b1.16b, #8; \ + ext b2.16b, b2.16b, b2.16b, #8; \ + eor r0.16b, r0.16b, r2.16b; \ + eor r1.16b, r1.16b, r3.16b; \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + eor r0.16b, r0.16b, r4.16b; \ + eor r1.16b, r1.16b, r5.16b; + +#define inc32_le128(vctr) \ + mov vctr.d[1], x9; \ + add w6, w9, #1; \ + mov vctr.d[0], x8; \ + bfi x9, x6, #0, #32; \ + rev64 vctr.16b, vctr.16b; + +#define GTAG_HASH_LENGTHS(vctr0, vlen) \ + ld1 {vlen.16b}, [x7]; \ + /* construct CTR0 */ \ + /* the lower 32-bits of initial IV is always be32(1) */ \ + mov x6, #0x1; \ + bfi x9, x6, #0, #32; \ + mov vctr0.d[0], x8; \ + mov vctr0.d[1], x9; \ + rbit vlen.16b, vlen.16b; \ + rev64 vctr0.16b, vctr0.16b; \ + /* authtag = GCTR(CTR0, GHASH) */ \ + eor RHASH.16b, RHASH.16b, vlen.16b; \ + SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1, \ + RTMP0, RTMP1); \ + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3); \ + rbit RHASH.16b, RHASH.16b; \ + eor RHASH.16b, RHASH.16b, vctr0.16b; + + +/* Register macros for encrypt and ghash */ + +/* can be the same as input v0-v3 */ +#define RR1 v0 +#define RR3 v1 +#define RR5 v2 +#define RR7 v3 + +#define RR0 v4 +#define RR2 v5 +#define RR4 v6 +#define RR6 v7 + +#define RTMP0 v8 +#define RTMP1 v9 +#define RTMP2 v10 +#define RTMP3 v11 +#define RTMP4 v12 +#define RTMP5 v13 +#define RTMP6 v14 +#define RTMP7 v15 + +#define RH1 v16 +#define RH2 v17 +#define RH3 v18 +#define RH4 v19 + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_ghash_setup) + /* input: + * x0: round key array, CTX + * x1: ghash table + */ + SM4_PREPARE(x0) + + adr_l x2, .Lghash_rconst + ld1r {RRCONST.2d}, [x2] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + /* H = E(K, 0^128) */ + rev32 v0.16b, RZERO.16b + SM4_CRYPT_BLK_BE(v0) + + /* H ^ 1 */ + rbit RH1.16b, v0.16b + + /* H ^ 2 */ + PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1) + REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3) + + /* H ^ 3 */ + PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1) + REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3) + + /* H ^ 4 */ + PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1) + REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3) + + st1 {RH1.16b-RH4.16b}, [x1] + + ret +SYM_FUNC_END(sm4_ce_pmull_ghash_setup) + +.align 3 +SYM_FUNC_START(pmull_ghash_update) + /* input: + * x0: ghash table + * x1: ghash result + * x2: src + * w3: nblocks + */ + ld1 {RH1.16b-RH4.16b}, [x0] + + ld1 {RHASH.16b}, [x1] + rbit RHASH.16b, RHASH.16b + + adr_l x4, .Lghash_rconst + ld1r {RRCONST.2d}, [x4] + + eor RZERO.16b, RZERO.16b, RZERO.16b + +.Lghash_loop_4x: + cmp w3, #4 + blt .Lghash_loop_1x + + sub w3, w3, #4 + + ld1 {v0.16b-v3.16b}, [x2], #64 + + rbit v0.16b, v0.16b + rbit v1.16b, v1.16b + rbit v2.16b, v2.16b + rbit v3.16b, v3.16b + + /* + * (in0 ^ HASH) * H^4 => rr0:rr1 + * (in1) * H^3 => rr2:rr3 + * (in2) * H^2 => rr4:rr5 + * (in3) * H^1 => rr6:rr7 + */ + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, + RR2, RR3, v1, RH3, RTMP2, RTMP3, + RR4, RR5, v2, RH2, RTMP4, RTMP5, + RR6, RR7, v3, RH1, RTMP6, RTMP7) + + eor RR0.16b, RR0.16b, RR2.16b + eor RR1.16b, RR1.16b, RR3.16b + eor RR0.16b, RR0.16b, RR4.16b + eor RR1.16b, RR1.16b, RR5.16b + eor RR0.16b, RR0.16b, RR6.16b + eor RR1.16b, RR1.16b, RR7.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + cbz w3, .Lghash_end + b .Lghash_loop_4x + +.Lghash_loop_1x: + sub w3, w3, #1 + + ld1 {v0.16b}, [x2], #16 + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + cbnz w3, .Lghash_loop_1x + +.Lghash_end: + rbit RHASH.16b, RHASH.16b + st1 {RHASH.2d}, [x1] + + ret +SYM_FUNC_END(pmull_ghash_update) + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_gcm_enc) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: ghash result + * x6: ghash table + * x7: lengths (only for last block) + */ + SM4_PREPARE(x0) + + ldp x8, x9, [x3] + rev x8, x8 + rev x9, x9 + + ld1 {RH1.16b-RH4.16b}, [x6] + + ld1 {RHASH.16b}, [x5] + rbit RHASH.16b, RHASH.16b + + adr_l x6, .Lghash_rconst + ld1r {RRCONST.2d}, [x6] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + cbz w4, .Lgcm_enc_hash_len + +.Lgcm_enc_loop_4x: + cmp w4, #(4 * 16) + blt .Lgcm_enc_loop_1x + + sub w4, w4, #(4 * 16) + + /* construct CTRs */ + inc32_le128(v0) /* +0 */ + inc32_le128(v1) /* +1 */ + inc32_le128(v2) /* +2 */ + inc32_le128(v3) /* +3 */ + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + st1 {v0.16b-v3.16b}, [x1], #64 + + /* ghash update */ + + rbit v0.16b, v0.16b + rbit v1.16b, v1.16b + rbit v2.16b, v2.16b + rbit v3.16b, v3.16b + + /* + * (in0 ^ HASH) * H^4 => rr0:rr1 + * (in1) * H^3 => rr2:rr3 + * (in2) * H^2 => rr4:rr5 + * (in3) * H^1 => rr6:rr7 + */ + eor RHASH.16b, RHASH.16b, v0.16b + + PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1, + RR2, RR3, v1, RH3, RTMP2, RTMP3, + RR4, RR5, v2, RH2, RTMP4, RTMP5, + RR6, RR7, v3, RH1, RTMP6, RTMP7) + + eor RR0.16b, RR0.16b, RR2.16b + eor RR1.16b, RR1.16b, RR3.16b + eor RR0.16b, RR0.16b, RR4.16b + eor RR1.16b, RR1.16b, RR5.16b + eor RR0.16b, RR0.16b, RR6.16b + eor RR1.16b, RR1.16b, RR7.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + cbz w4, .Lgcm_enc_hash_len + b .Lgcm_enc_loop_4x + +.Lgcm_enc_loop_1x: + cmp w4, #16 + blt .Lgcm_enc_tail + + sub w4, w4, #16 + + /* construct CTRs */ + inc32_le128(v0) + + ld1 {RTMP0.16b}, [x2], #16 + + SM4_CRYPT_BLK(v0) + + eor v0.16b, v0.16b, RTMP0.16b + st1 {v0.16b}, [x1], #16 + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + cbz w4, .Lgcm_enc_hash_len + b .Lgcm_enc_loop_1x + +.Lgcm_enc_tail: + /* construct CTRs */ + inc32_le128(v0) + SM4_CRYPT_BLK(v0) + + /* load permute table */ + adr_l x0, .Lcts_permute_table + add x0, x0, #32 + sub x0, x0, w4, uxtw + ld1 {v3.16b}, [x0] + +.Lgcm_enc_tail_loop: + /* do encrypt */ + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w6, v0.b[0] /* get top crypted byte */ + eor w6, w6, w0 /* w6 = CTR ^ input */ + strb w6, [x1], #1 /* store out byte */ + + /* shift right out one byte */ + ext v0.16b, v0.16b, v0.16b, #1 + /* the last ciphertext is placed in high bytes */ + ins v0.b[15], w6 + + subs w4, w4, #1 + bne .Lgcm_enc_tail_loop + + /* padding last block with zeros */ + tbl v0.16b, {v0.16b}, v3.16b + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + +.Lgcm_enc_hash_len: + cbz x7, .Lgcm_enc_end + + GTAG_HASH_LENGTHS(v1, v3) + + b .Lgcm_enc_ret + +.Lgcm_enc_end: + /* store new CTR */ + rev x8, x8 + rev x9, x9 + stp x8, x9, [x3] + + rbit RHASH.16b, RHASH.16b + +.Lgcm_enc_ret: + /* store new MAC */ + st1 {RHASH.2d}, [x5] + + ret +SYM_FUNC_END(sm4_ce_pmull_gcm_enc) + +#undef RR1 +#undef RR3 +#undef RR5 +#undef RR7 +#undef RR0 +#undef RR2 +#undef RR4 +#undef RR6 +#undef RTMP0 +#undef RTMP1 +#undef RTMP2 +#undef RTMP3 +#undef RTMP4 +#undef RTMP5 +#undef RTMP6 +#undef RTMP7 +#undef RH1 +#undef RH2 +#undef RH3 +#undef RH4 + + +/* Register macros for decrypt */ + +/* v0-v2 for building CTRs, v3-v5 for saving inputs */ + +#define RR1 v6 +#define RR3 v7 +#define RR5 v8 + +#define RR0 v9 +#define RR2 v10 +#define RR4 v11 + +#define RTMP0 v12 +#define RTMP1 v13 +#define RTMP2 v14 +#define RTMP3 v15 +#define RTMP4 v16 +#define RTMP5 v17 + +#define RH1 v18 +#define RH2 v19 +#define RH3 v20 + +.align 3 +SYM_FUNC_START(sm4_ce_pmull_gcm_dec) + /* input: + * x0: round key array, CTX + * x1: dst + * x2: src + * x3: ctr (big endian, 128 bit) + * w4: nbytes + * x5: ghash result + * x6: ghash table + * x7: lengths (only for last block) + */ + SM4_PREPARE(x0) + + ldp x8, x9, [x3] + rev x8, x8 + rev x9, x9 + + ld1 {RH1.16b-RH3.16b}, [x6] + + ld1 {RHASH.16b}, [x5] + rbit RHASH.16b, RHASH.16b + + adr_l x6, .Lghash_rconst + ld1r {RRCONST.2d}, [x6] + + eor RZERO.16b, RZERO.16b, RZERO.16b + + cbz w4, .Lgcm_dec_hash_len + +.Lgcm_dec_loop_3x: + cmp w4, #(3 * 16) + blt .Lgcm_dec_loop_1x + + sub w4, w4, #(3 * 16) + + ld1 {v3.16b-v5.16b}, [x2], #(3 * 16) + + /* construct CTRs */ + inc32_le128(v0) /* +0 */ + rbit v6.16b, v3.16b + inc32_le128(v1) /* +1 */ + rbit v7.16b, v4.16b + inc32_le128(v2) /* +2 */ + rbit v8.16b, v5.16b + + eor RHASH.16b, RHASH.16b, v6.16b + + /* decrypt & ghash update */ + SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2, + RR0, RR1, RHASH, RH3, RTMP0, RTMP1, + RR2, RR3, v7, RH2, RTMP2, RTMP3, + RR4, RR5, v8, RH1, RTMP4, RTMP5) + + eor v0.16b, v0.16b, v3.16b + eor v1.16b, v1.16b, v4.16b + eor v2.16b, v2.16b, v5.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1) + + st1 {v0.16b-v2.16b}, [x1], #(3 * 16) + + cbz w4, .Lgcm_dec_hash_len + b .Lgcm_dec_loop_3x + +.Lgcm_dec_loop_1x: + cmp w4, #16 + blt .Lgcm_dec_tail + + sub w4, w4, #16 + + ld1 {v3.16b}, [x2], #16 + + /* construct CTRs */ + inc32_le128(v0) + rbit v6.16b, v3.16b + + eor RHASH.16b, RHASH.16b, v6.16b + + SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + + eor v0.16b, v0.16b, v3.16b + + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + + st1 {v0.16b}, [x1], #16 + + cbz w4, .Lgcm_dec_hash_len + b .Lgcm_dec_loop_1x + +.Lgcm_dec_tail: + /* construct CTRs */ + inc32_le128(v0) + SM4_CRYPT_BLK(v0) + + /* load permute table */ + adr_l x0, .Lcts_permute_table + add x0, x0, #32 + sub x0, x0, w4, uxtw + ld1 {v3.16b}, [x0] + +.Lgcm_dec_tail_loop: + /* do decrypt */ + ldrb w0, [x2], #1 /* get 1 byte from input */ + umov w6, v0.b[0] /* get top crypted byte */ + eor w6, w6, w0 /* w6 = CTR ^ input */ + strb w6, [x1], #1 /* store out byte */ + + /* shift right out one byte */ + ext v0.16b, v0.16b, v0.16b, #1 + /* the last ciphertext is placed in high bytes */ + ins v0.b[15], w0 + + subs w4, w4, #1 + bne .Lgcm_dec_tail_loop + + /* padding last block with zeros */ + tbl v0.16b, {v0.16b}, v3.16b + + /* ghash update */ + rbit v0.16b, v0.16b + eor RHASH.16b, RHASH.16b, v0.16b + PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1) + REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3) + +.Lgcm_dec_hash_len: + cbz x7, .Lgcm_dec_end + + GTAG_HASH_LENGTHS(v1, v3) + + b .Lgcm_dec_ret + +.Lgcm_dec_end: + /* store new CTR */ + rev x8, x8 + rev x9, x9 + stp x8, x9, [x3] + + rbit RHASH.16b, RHASH.16b + +.Lgcm_dec_ret: + /* store new MAC */ + st1 {RHASH.2d}, [x5] + + ret +SYM_FUNC_END(sm4_ce_pmull_gcm_dec) + + .section ".rodata", "a" + .align 4 +.Lcts_permute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 + .byte 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + +.Lghash_rconst: + .quad 0x87 diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c new file mode 100644 index 0000000000000000000000000000000000000000..c450a2025ca9aff2bed43c93c784f22ac066e495 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c @@ -0,0 +1,286 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions + * as specified in rfc8998 + * https://datatracker.ietf.org/doc/html/rfc8998 + * + * Copyright (C) 2022 Tianjia Zhang + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "sm4-ce.h" + +asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table); +asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash, + const u8 *src, unsigned int nblocks); +asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths); +asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst, + const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths); + +#define GHASH_BLOCK_SIZE 16 +#define GCM_IV_SIZE 12 + +struct sm4_gcm_ctx { + struct sm4_ctx key; + u8 ghash_table[16 * 4]; +}; + + +static int gcm_setkey(struct crypto_aead *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table); + + kernel_neon_end(); + return 0; +} + +static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12 ... 16: + return 0; + default: + return -EINVAL; + } +} + +static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[]) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) buffer[GHASH_BLOCK_SIZE]; + u32 assoclen = req->assoclen; + struct scatter_walk walk; + unsigned int buflen = 0; + + scatterwalk_start(&walk, req->src); + + do { + u32 n = scatterwalk_clamp(&walk, assoclen); + u8 *p, *ptr; + + if (!n) { + scatterwalk_start(&walk, sg_next(walk.sg)); + n = scatterwalk_clamp(&walk, assoclen); + } + + p = ptr = scatterwalk_map(&walk); + assoclen -= n; + scatterwalk_advance(&walk, n); + + if (n + buflen < GHASH_BLOCK_SIZE) { + memcpy(&buffer[buflen], ptr, n); + buflen += n; + } else { + unsigned int nblocks; + + if (buflen) { + unsigned int l = GHASH_BLOCK_SIZE - buflen; + + memcpy(&buffer[buflen], ptr, l); + ptr += l; + n -= l; + + pmull_ghash_update(ctx->ghash_table, ghash, + buffer, 1); + } + + nblocks = n / GHASH_BLOCK_SIZE; + if (nblocks) { + pmull_ghash_update(ctx->ghash_table, ghash, + ptr, nblocks); + ptr += nblocks * GHASH_BLOCK_SIZE; + } + + buflen = n % GHASH_BLOCK_SIZE; + if (buflen) + memcpy(&buffer[0], ptr, buflen); + } + + scatterwalk_unmap(p); + scatterwalk_done(&walk, 0, assoclen); + } while (assoclen); + + /* padding with '0' */ + if (buflen) { + memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen); + pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1); + } +} + +static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk, + struct sm4_gcm_ctx *ctx, u8 ghash[], + void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc, + u8 *dst, const u8 *src, u8 *iv, + unsigned int nbytes, u8 *ghash, + const u8 *ghash_table, const u8 *lengths)) +{ + u8 __aligned(8) iv[SM4_BLOCK_SIZE]; + be128 __aligned(8) lengths; + int err; + + memset(ghash, 0, SM4_BLOCK_SIZE); + + lengths.a = cpu_to_be64(req->assoclen * 8); + lengths.b = cpu_to_be64(walk->total * 8); + + memcpy(iv, walk->iv, GCM_IV_SIZE); + put_unaligned_be32(2, iv + GCM_IV_SIZE); + + kernel_neon_begin(); + + if (req->assoclen) + gcm_calculate_auth_mac(req, ghash); + + do { + unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE; + const u8 *src = walk->src.virt.addr; + u8 *dst = walk->dst.virt.addr; + + if (walk->nbytes == walk->total) { + tail = 0; + + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, + walk->nbytes, ghash, + ctx->ghash_table, + (const u8 *)&lengths); + } else if (walk->nbytes - tail) { + sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv, + walk->nbytes - tail, ghash, + ctx->ghash_table, NULL); + } + + kernel_neon_end(); + + err = skcipher_walk_done(walk, tail); + if (err) + return err; + if (walk->nbytes) + kernel_neon_begin(); + } while (walk->nbytes > 0); + + return 0; +} + +static int gcm_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_aead_encrypt(&walk, req, false); + if (err) + return err; + + err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc); + if (err) + return err; + + /* copy authtag to end of dst */ + scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen, + crypto_aead_authsize(aead), 1); + + return 0; +} + +static int gcm_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + unsigned int authsize = crypto_aead_authsize(aead); + struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead); + u8 __aligned(8) ghash[SM4_BLOCK_SIZE]; + u8 authtag[SM4_BLOCK_SIZE]; + struct skcipher_walk walk; + int err; + + err = skcipher_walk_aead_decrypt(&walk, req, false); + if (err) + return err; + + err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec); + if (err) + return err; + + /* compare calculated auth tag with the stored one */ + scatterwalk_map_and_copy(authtag, req->src, + req->assoclen + req->cryptlen - authsize, + authsize, 0); + + if (crypto_memneq(authtag, ghash, authsize)) + return -EBADMSG; + + return 0; +} + +static struct aead_alg sm4_gcm_alg = { + .base = { + .cra_name = "gcm(sm4)", + .cra_driver_name = "gcm-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_gcm_ctx), + .cra_module = THIS_MODULE, + }, + .ivsize = GCM_IV_SIZE, + .chunksize = SM4_BLOCK_SIZE, + .maxauthsize = SM4_BLOCK_SIZE, + .setkey = gcm_setkey, + .setauthsize = gcm_setauthsize, + .encrypt = gcm_encrypt, + .decrypt = gcm_decrypt, +}; + +static int __init sm4_ce_gcm_init(void) +{ + if (!cpu_have_named_feature(PMULL)) + return -ENODEV; + + return crypto_register_aead(&sm4_gcm_alg); +} + +static void __exit sm4_ce_gcm_exit(void) +{ + crypto_unregister_aead(&sm4_gcm_alg); +} + +static const struct cpu_feature __maybe_unused sm4_ce_gcm_cpu_feature[] = { + { cpu_feature(PMULL) }, + {} +}; +MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature); + +module_cpu_feature_match(SM4, sm4_ce_gcm_init); +module_exit(sm4_ce_gcm_exit); + +MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions"); +MODULE_ALIAS_CRYPTO("gcm(sm4)"); +MODULE_AUTHOR("Tianjia Zhang "); +MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c index 496d55c0d01a4617ca3c014382ad3e9ea246f866..0a2d32ed3bdec4504ef3546fd1b356431a83a088 100644 --- a/arch/arm64/crypto/sm4-ce-glue.c +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -14,8 +14,12 @@ #include #include #include +#include #include #include +#include +#include +#include #include #define BYTES2BLKS(nbytes) ((nbytes) >> 4) @@ -26,15 +30,48 @@ asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src, unsigned int nblks); asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); + u8 *iv, unsigned int nblocks); asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); + u8 *iv, unsigned int nblocks); +asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes); +asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src, + u8 *iv, unsigned int nbytes); asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src, u8 *iv, unsigned int nblks); +asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src, + u8 *tweak, unsigned int nbytes, + const u32 *rkey2_enc); +asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src, + u8 *tweak, unsigned int nbytes, + const u32 *rkey2_enc); +asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest, + const u8 *src, unsigned int nblocks, + bool enc_before, bool enc_after); + +EXPORT_SYMBOL(sm4_ce_expand_key); +EXPORT_SYMBOL(sm4_ce_crypt_block); +EXPORT_SYMBOL(sm4_ce_cbc_enc); +EXPORT_SYMBOL(sm4_ce_cfb_enc); + +struct sm4_xts_ctx { + struct sm4_ctx key1; + struct sm4_ctx key2; +}; + +struct sm4_mac_tfm_ctx { + struct sm4_ctx key; + u8 __aligned(8) consts[]; +}; + +struct sm4_mac_desc_ctx { + unsigned int len; + u8 digest[SM4_BLOCK_SIZE]; +}; static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) @@ -44,8 +81,33 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, if (key_len != SM4_KEY_SIZE) return -EINVAL; + kernel_neon_begin(); sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + return 0; +} + +static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int ret; + + if (key_len != SM4_KEY_SIZE * 2) + return -EINVAL; + + ret = xts_verify_key(tfm, key, key_len); + if (ret) + return ret; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->key1.rkey_enc, + ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc, + ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + return 0; } @@ -94,66 +156,128 @@ static int sm4_ecb_decrypt(struct skcipher_request *req) return sm4_ecb_do_crypt(req, ctx->rkey_dec); } -static int sm4_cbc_encrypt(struct skcipher_request *req) +static int sm4_cbc_crypt(struct skcipher_request *req, + struct sm4_ctx *ctx, bool encrypt) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; unsigned int nbytes; int err; err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; - } + if (encrypt) + sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, + walk.iv, nblocks); + else + sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); - kernel_neon_end(); + kernel_neon_end(); + } - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; } +static int sm4_cbc_encrypt(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_cbc_crypt(req, ctx, true); +} + static int sm4_cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + + return sm4_cbc_crypt(req, ctx, false); +} + +static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm); + struct scatterlist *src = req->src; + struct scatterlist *dst = req->dst; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; struct skcipher_walk walk; - unsigned int nbytes; + int cbc_blocks; int err; - err = skcipher_walk_virt(&walk, req, false); + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; - while ((nbytes = walk.nbytes) > 0) { - const u8 *src = walk.src.virt.addr; - u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + if (req->cryptlen == SM4_BLOCK_SIZE) + return sm4_cbc_crypt(req, ctx, encrypt); - kernel_neon_begin(); + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, skcipher_request_flags(req), + NULL, NULL); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; - } + /* handle the CBC cryption part */ + cbc_blocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2; + if (cbc_blocks) { + skcipher_request_set_crypt(&subreq, src, dst, + cbc_blocks * SM4_BLOCK_SIZE, + req->iv); - kernel_neon_end(); + err = sm4_cbc_crypt(&subreq, ctx, encrypt); + if (err) + return err; - err = skcipher_walk_done(&walk, nbytes); + dst = src = scatterwalk_ffwd(sg_src, src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, + subreq.cryptlen); } - return err; + /* handle ciphertext stealing */ + skcipher_request_set_crypt(&subreq, src, dst, + req->cryptlen - cbc_blocks * SM4_BLOCK_SIZE, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + else + sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes); + + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int sm4_cbc_cts_encrypt(struct skcipher_request *req) +{ + return sm4_cbc_cts_crypt(req, true); +} + +static int sm4_cbc_cts_decrypt(struct skcipher_request *req) +{ + return sm4_cbc_cts_crypt(req, false); } static int sm4_cfb_encrypt(struct skcipher_request *req) @@ -283,6 +407,111 @@ static int sm4_ctr_crypt(struct skcipher_request *req) return err; } +static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm); + int tail = req->cryptlen % SM4_BLOCK_SIZE; + const u32 *rkey2_enc = ctx->key2.rkey_enc; + struct scatterlist sg_src[2], sg_dst[2]; + struct skcipher_request subreq; + struct scatterlist *src, *dst; + struct skcipher_walk walk; + unsigned int nbytes; + int err; + + if (req->cryptlen < SM4_BLOCK_SIZE) + return -EINVAL; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + if (unlikely(tail > 0 && walk.nbytes < walk.total)) { + int nblocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2; + + skcipher_walk_abort(&walk); + + skcipher_request_set_tfm(&subreq, tfm); + skcipher_request_set_callback(&subreq, + skcipher_request_flags(req), + NULL, NULL); + skcipher_request_set_crypt(&subreq, req->src, req->dst, + nblocks * SM4_BLOCK_SIZE, req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + } else { + tail = 0; + } + + while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) { + if (nbytes < walk.total) + nbytes &= ~(SM4_BLOCK_SIZE - 1); + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, nbytes, + rkey2_enc); + + kernel_neon_end(); + + rkey2_enc = NULL; + + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + if (err) + return err; + } + + if (likely(tail == 0)) + return 0; + + /* handle ciphertext stealing */ + + dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen); + if (req->dst != req->src) + dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen); + + skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail, + req->iv); + + err = skcipher_walk_virt(&walk, &subreq, false); + if (err) + return err; + + kernel_neon_begin(); + + if (encrypt) + sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + else + sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr, + walk.src.virt.addr, walk.iv, walk.nbytes, + rkey2_enc); + + kernel_neon_end(); + + return skcipher_walk_done(&walk, 0); +} + +static int sm4_xts_encrypt(struct skcipher_request *req) +{ + return sm4_xts_crypt(req, true); +} + +static int sm4_xts_decrypt(struct skcipher_request *req) +{ + return sm4_xts_crypt(req, false); +} + static struct skcipher_alg sm4_algs[] = { { .base = { @@ -345,28 +574,312 @@ static struct skcipher_alg sm4_algs[] = { .setkey = sm4_setkey, .encrypt = sm4_ctr_crypt, .decrypt = sm4_ctr_crypt, + }, { + .base = { + .cra_name = "cts(cbc(sm4))", + .cra_driver_name = "cts-cbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE, + .max_keysize = SM4_KEY_SIZE, + .ivsize = SM4_BLOCK_SIZE, + .walksize = SM4_BLOCK_SIZE * 2, + .setkey = sm4_setkey, + .encrypt = sm4_cbc_cts_encrypt, + .decrypt = sm4_cbc_cts_decrypt, + }, { + .base = { + .cra_name = "xts(sm4)", + .cra_driver_name = "xts-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_xts_ctx), + .cra_module = THIS_MODULE, + }, + .min_keysize = SM4_KEY_SIZE * 2, + .max_keysize = SM4_KEY_SIZE * 2, + .ivsize = SM4_BLOCK_SIZE, + .walksize = SM4_BLOCK_SIZE * 2, + .setkey = sm4_xts_setkey, + .encrypt = sm4_xts_encrypt, + .decrypt = sm4_xts_decrypt, + } +}; + +static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + kernel_neon_end(); + + return 0; +} + +static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + be128 *consts = (be128 *)ctx->consts; + u64 a, b; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + memset(consts, 0, SM4_BLOCK_SIZE); + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + /* encrypt the zero block */ + sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts); + + kernel_neon_end(); + + /* gf(2^128) multiply zero-ciphertext with u and u^2 */ + a = be64_to_cpu(consts[0].a); + b = be64_to_cpu(consts[0].b); + consts[0].a = cpu_to_be64((a << 1) | (b >> 63)); + consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0)); + + a = be64_to_cpu(consts[0].a); + b = be64_to_cpu(consts[0].b); + consts[1].a = cpu_to_be64((a << 1) | (b >> 63)); + consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0)); + + return 0; +} + +static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int key_len) +{ + struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm); + u8 __aligned(8) key2[SM4_BLOCK_SIZE]; + static u8 const ks[3][SM4_BLOCK_SIZE] = { + { [0 ... SM4_BLOCK_SIZE - 1] = 0x1}, + { [0 ... SM4_BLOCK_SIZE - 1] = 0x2}, + { [0 ... SM4_BLOCK_SIZE - 1] = 0x3}, + }; + + if (key_len != SM4_KEY_SIZE) + return -EINVAL; + + kernel_neon_begin(); + + sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]); + sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2); + + sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec, + crypto_sm4_fk, crypto_sm4_ck); + + kernel_neon_end(); + + return 0; +} + +static int sm4_mac_init(struct shash_desc *desc) +{ + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + + memset(ctx->digest, 0, SM4_BLOCK_SIZE); + ctx->len = 0; + + return 0; +} + +static int sm4_mac_update(struct shash_desc *desc, const u8 *p, + unsigned int len) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + unsigned int l, nblocks; + + if (len == 0) + return 0; + + if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) { + l = min(len, SM4_BLOCK_SIZE - ctx->len); + + crypto_xor(ctx->digest + ctx->len, p, l); + ctx->len += l; + len -= l; + p += l; + } + + if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) { + kernel_neon_begin(); + + if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) { + sm4_ce_crypt_block(tctx->key.rkey_enc, + ctx->digest, ctx->digest); + ctx->len = 0; + } else { + nblocks = len / SM4_BLOCK_SIZE; + len %= SM4_BLOCK_SIZE; + + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p, + nblocks, (ctx->len == SM4_BLOCK_SIZE), + (len != 0)); + + p += nblocks * SM4_BLOCK_SIZE; + + if (len == 0) + ctx->len = SM4_BLOCK_SIZE; + } + + kernel_neon_end(); + + if (len) { + crypto_xor(ctx->digest, p, len); + ctx->len = len; + } + } + + return 0; +} + +static int sm4_cmac_final(struct shash_desc *desc, u8 *out) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + const u8 *consts = tctx->consts; + + if (ctx->len != SM4_BLOCK_SIZE) { + ctx->digest[ctx->len] ^= 0x80; + consts += SM4_BLOCK_SIZE; + } + + kernel_neon_begin(); + sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1, + false, true); + kernel_neon_end(); + + memcpy(out, ctx->digest, SM4_BLOCK_SIZE); + + return 0; +} + +static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out) +{ + struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm); + struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc); + + if (ctx->len) { + kernel_neon_begin(); + sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest, + ctx->digest); + kernel_neon_end(); + } + + memcpy(out, ctx->digest, SM4_BLOCK_SIZE); + + return 0; +} + +static struct shash_alg sm4_mac_algs[] = { + { + .base = { + .cra_name = "cmac(sm4)", + .cra_driver_name = "cmac-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + + SM4_BLOCK_SIZE * 2, + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cmac_final, + .setkey = sm4_cmac_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), + }, { + .base = { + .cra_name = "xcbc(sm4)", + .cra_driver_name = "xcbc-sm4-ce", + .cra_priority = 400, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx) + + SM4_BLOCK_SIZE * 2, + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cmac_final, + .setkey = sm4_xcbc_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), + }, { + .base = { + .cra_name = "cbcmac(sm4)", + .cra_driver_name = "cbcmac-sm4-ce", + .cra_priority = 400, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx), + .cra_module = THIS_MODULE, + }, + .digestsize = SM4_BLOCK_SIZE, + .init = sm4_mac_init, + .update = sm4_mac_update, + .final = sm4_cbcmac_final, + .setkey = sm4_cbcmac_setkey, + .descsize = sizeof(struct sm4_mac_desc_ctx), } }; static int __init sm4_init(void) { - return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + int err; + + err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + if (err) + return err; + + err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs)); + if (err) + goto out_err; + + return 0; + +out_err: + crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); + return err; } static void __exit sm4_exit(void) { + crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs)); crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs)); } module_cpu_feature_match(SM4, sm4_init); module_exit(sm4_exit); -MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions"); +MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR/XTS using ARMv8 Crypto Extensions"); MODULE_ALIAS_CRYPTO("sm4-ce"); MODULE_ALIAS_CRYPTO("sm4"); MODULE_ALIAS_CRYPTO("ecb(sm4)"); MODULE_ALIAS_CRYPTO("cbc(sm4)"); MODULE_ALIAS_CRYPTO("cfb(sm4)"); MODULE_ALIAS_CRYPTO("ctr(sm4)"); +MODULE_ALIAS_CRYPTO("cts(cbc(sm4))"); +MODULE_ALIAS_CRYPTO("xts(sm4)"); +MODULE_ALIAS_CRYPTO("cmac(sm4)"); +MODULE_ALIAS_CRYPTO("xcbc(sm4)"); +MODULE_ALIAS_CRYPTO("cbcmac(sm4)"); MODULE_AUTHOR("Tianjia Zhang "); MODULE_LICENSE("GPL v2"); diff --git a/arch/arm64/crypto/sm4-ce.h b/arch/arm64/crypto/sm4-ce.h new file mode 100644 index 0000000000000000000000000000000000000000..109c21b37590d39a7309e4aa5cadf813e454a736 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM4 common functions for Crypto Extensions + * Copyright (C) 2022 Tianjia Zhang + */ + +void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec, + const u32 *fk, const u32 *ck); + +void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src); + +void sm4_ce_cbc_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); + +void sm4_ce_cfb_enc(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S index 3d5256b354d27fb2f5464fc810f595ab17b1a115..f295b4b7d70a3ee9e02bfa3c2df2ea14d3d72a8b 100644 --- a/arch/arm64/crypto/sm4-neon-core.S +++ b/arch/arm64/crypto/sm4-neon-core.S @@ -18,6 +18,11 @@ #define RTMP2 v10 #define RTMP3 v11 +#define RTMP4 v12 +#define RTMP5 v13 +#define RTMP6 v14 +#define RTMP7 v15 + #define RX0 v12 #define RX1 v13 #define RKEY v14 @@ -25,7 +30,7 @@ /* Helper macros. */ -#define PREPARE \ +#define SM4_PREPARE() \ adr_l x5, crypto_sm4_sbox; \ ld1 {v16.16b-v19.16b}, [x5], #64; \ ld1 {v20.16b-v23.16b}, [x5], #64; \ @@ -42,7 +47,25 @@ zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ zip2 s3.2d, RTMP2.2d, RTMP3.2d; -#define rotate_clockwise_90(s0, s1, s2, s3) \ +#define transpose_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \ + zip1 RTMP0.4s, s0.4s, s1.4s; \ + zip1 RTMP1.4s, s2.4s, s3.4s; \ + zip2 RTMP2.4s, s0.4s, s1.4s; \ + zip2 RTMP3.4s, s2.4s, s3.4s; \ + zip1 RTMP4.4s, s4.4s, s5.4s; \ + zip1 RTMP5.4s, s6.4s, s7.4s; \ + zip2 RTMP6.4s, s4.4s, s5.4s; \ + zip2 RTMP7.4s, s6.4s, s7.4s; \ + zip1 s0.2d, RTMP0.2d, RTMP1.2d; \ + zip2 s1.2d, RTMP0.2d, RTMP1.2d; \ + zip1 s2.2d, RTMP2.2d, RTMP3.2d; \ + zip2 s3.2d, RTMP2.2d, RTMP3.2d; \ + zip1 s4.2d, RTMP4.2d, RTMP5.2d; \ + zip2 s5.2d, RTMP4.2d, RTMP5.2d; \ + zip1 s6.2d, RTMP6.2d, RTMP7.2d; \ + zip2 s7.2d, RTMP6.2d, RTMP7.2d; + +#define rotate_clockwise_4x4(s0, s1, s2, s3) \ zip1 RTMP0.4s, s1.4s, s0.4s; \ zip2 RTMP1.4s, s1.4s, s0.4s; \ zip1 RTMP2.4s, s3.4s, s2.4s; \ @@ -52,6 +75,24 @@ zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ zip2 s3.2d, RTMP3.2d, RTMP1.2d; +#define rotate_clockwise_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \ + zip1 RTMP0.4s, s1.4s, s0.4s; \ + zip1 RTMP2.4s, s3.4s, s2.4s; \ + zip2 RTMP1.4s, s1.4s, s0.4s; \ + zip2 RTMP3.4s, s3.4s, s2.4s; \ + zip1 RTMP4.4s, s5.4s, s4.4s; \ + zip1 RTMP6.4s, s7.4s, s6.4s; \ + zip2 RTMP5.4s, s5.4s, s4.4s; \ + zip2 RTMP7.4s, s7.4s, s6.4s; \ + zip1 s0.2d, RTMP2.2d, RTMP0.2d; \ + zip2 s1.2d, RTMP2.2d, RTMP0.2d; \ + zip1 s2.2d, RTMP3.2d, RTMP1.2d; \ + zip2 s3.2d, RTMP3.2d, RTMP1.2d; \ + zip1 s4.2d, RTMP6.2d, RTMP4.2d; \ + zip2 s5.2d, RTMP6.2d, RTMP4.2d; \ + zip1 s6.2d, RTMP7.2d, RTMP5.2d; \ + zip2 s7.2d, RTMP7.2d, RTMP5.2d; + #define ROUND4(round, s0, s1, s2, s3) \ dup RX0.4s, RKEY.s[round]; \ /* rk ^ s1 ^ s2 ^ s3 */ \ @@ -87,14 +128,7 @@ /* s0 ^= RTMP3 */ \ eor s0.16b, s0.16b, RTMP3.16b; -#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ - rev32 b0.16b, b0.16b; \ - rev32 b1.16b, b1.16b; \ - rev32 b2.16b, b2.16b; \ - rev32 b3.16b, b3.16b; \ - \ - transpose_4x4(b0, b1, b2, b3); \ - \ +#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3) \ mov x6, 8; \ 4: \ ld1 {RKEY.4s}, [x0], #16; \ @@ -107,15 +141,23 @@ \ bne 4b; \ \ - rotate_clockwise_90(b0, b1, b2, b3); \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ rev32 b3.16b, b3.16b; \ \ + rotate_clockwise_4x4(b0, b1, b2, b3); \ + \ /* repoint to rkey */ \ sub x0, x0, #128; +#define SM4_CRYPT_BLK4(b0, b1, b2, b3) \ + rev32 b0.16b, b0.16b; \ + rev32 b1.16b, b1.16b; \ + rev32 b2.16b, b2.16b; \ + rev32 b3.16b, b3.16b; \ + SM4_CRYPT_BLK4_BE(b0, b1, b2, b3); + #define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3) \ /* rk ^ s1 ^ s2 ^ s3 */ \ dup RX0.4s, RKEY.s[round]; \ @@ -175,7 +217,7 @@ eor s0.16b, s0.16b, RTMP0.16b; \ eor t0.16b, t0.16b, RTMP1.16b; -#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ +#define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7) \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ @@ -185,9 +227,6 @@ rev32 b6.16b, b6.16b; \ rev32 b7.16b, b7.16b; \ \ - transpose_4x4(b0, b1, b2, b3); \ - transpose_4x4(b4, b5, b6, b7); \ - \ mov x6, 8; \ 8: \ ld1 {RKEY.4s}, [x0], #16; \ @@ -200,8 +239,6 @@ \ bne 8b; \ \ - rotate_clockwise_90(b0, b1, b2, b3); \ - rotate_clockwise_90(b4, b5, b6, b7); \ rev32 b0.16b, b0.16b; \ rev32 b1.16b, b1.16b; \ rev32 b2.16b, b2.16b; \ @@ -214,274 +251,429 @@ /* repoint to rkey */ \ sub x0, x0, #128; +#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \ + SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7); \ + rotate_clockwise_4x4_2x(b0, b1, b2, b3, b4, b5, b6, b7); \ -.align 3 -SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4) - /* input: - * x0: round key array, CTX - * x1: dst - * x2: src - * w3: num blocks (1..4) - */ - PREPARE; - - ld1 {v0.16b}, [x2], #16; - mov v1.16b, v0.16b; - mov v2.16b, v0.16b; - mov v3.16b, v0.16b; - cmp w3, #2; - blt .Lblk4_load_input_done; - ld1 {v1.16b}, [x2], #16; - beq .Lblk4_load_input_done; - ld1 {v2.16b}, [x2], #16; - cmp w3, #3; - beq .Lblk4_load_input_done; - ld1 {v3.16b}, [x2]; - -.Lblk4_load_input_done: - SM4_CRYPT_BLK4(v0, v1, v2, v3); - - st1 {v0.16b}, [x1], #16; - cmp w3, #2; - blt .Lblk4_store_output_done; - st1 {v1.16b}, [x1], #16; - beq .Lblk4_store_output_done; - st1 {v2.16b}, [x1], #16; - cmp w3, #3; - beq .Lblk4_store_output_done; - st1 {v3.16b}, [x1]; - -.Lblk4_store_output_done: - ret; -SYM_FUNC_END(__sm4_neon_crypt_blk1_4) .align 3 -SYM_FUNC_START(sm4_neon_crypt_blk1_8) +SYM_FUNC_START(sm4_neon_crypt) /* input: * x0: round key array, CTX * x1: dst * x2: src - * w3: num blocks (1..8) + * w3: nblocks */ - cmp w3, #5; - blt __sm4_neon_crypt_blk1_4; - - PREPARE; - - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b}, [x2], #16; - mov v5.16b, v4.16b; - mov v6.16b, v4.16b; - mov v7.16b, v4.16b; - beq .Lblk8_load_input_done; - ld1 {v5.16b}, [x2], #16; - cmp w3, #7; - blt .Lblk8_load_input_done; - ld1 {v6.16b}, [x2], #16; - beq .Lblk8_load_input_done; - ld1 {v7.16b}, [x2]; - -.Lblk8_load_input_done: - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); - - cmp w3, #6; - st1 {v0.16b-v3.16b}, [x1], #64; - st1 {v4.16b}, [x1], #16; - blt .Lblk8_store_output_done; - st1 {v5.16b}, [x1], #16; - beq .Lblk8_store_output_done; - st1 {v6.16b}, [x1], #16; - cmp w3, #7; - beq .Lblk8_store_output_done; - st1 {v7.16b}, [x1]; - -.Lblk8_store_output_done: - ret; -SYM_FUNC_END(sm4_neon_crypt_blk1_8) + SM4_PREPARE() -.align 3 -SYM_FUNC_START(sm4_neon_crypt_blk8) - /* input: - * x0: round key array, CTX - * x1: dst - * x2: src - * w3: nblocks (multiples of 8) - */ - PREPARE; +.Lcrypt_loop_8x: + sub w3, w3, #8 + tbnz w3, #31, .Lcrypt_4x + + ld4 {v0.4s-v3.4s}, [x2], #64 + ld4 {v4.4s-v7.4s}, [x2], #64 -.Lcrypt_loop_blk: - subs w3, w3, #8; - bmi .Lcrypt_end; + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2], #64; + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + cbz w3, .Lcrypt_end + b .Lcrypt_loop_8x - st1 {v0.16b-v3.16b}, [x1], #64; - st1 {v4.16b-v7.16b}, [x1], #64; +.Lcrypt_4x: + add w3, w3, #8 + cmp w3, #4 + blt .Lcrypt_tail - b .Lcrypt_loop_blk; + sub w3, w3, #4 + + ld4 {v0.4s-v3.4s}, [x2], #64 + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + st1 {v0.16b-v3.16b}, [x1], #64 + + cbz w3, .Lcrypt_end + +.Lcrypt_tail: + cmp w3, #2 + ld1 {v0.16b}, [x2], #16 + blt .Lcrypt_tail_load_done + ld1 {v1.16b}, [x2], #16 + beq .Lcrypt_tail_load_done + ld1 {v2.16b}, [x2], #16 + +.Lcrypt_tail_load_done: + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + cmp w3, #2 + st1 {v0.16b}, [x1], #16 + blt .Lcrypt_end + st1 {v1.16b}, [x1], #16 + beq .Lcrypt_end + st1 {v2.16b}, [x1], #16 .Lcrypt_end: - ret; -SYM_FUNC_END(sm4_neon_crypt_blk8) + ret +SYM_FUNC_END(sm4_neon_crypt) .align 3 -SYM_FUNC_START(sm4_neon_cbc_dec_blk8) +SYM_FUNC_START(sm4_neon_cbc_dec) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: iv (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() + + ld1 {RIV.16b}, [x3] + +.Lcbc_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcbc_dec_4x + + ld4 {v0.4s-v3.4s}, [x2], #64 + ld4 {v4.4s-v7.4s}, [x2] + + SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7) + + /* Avoid overwriting the RIV register */ + rotate_clockwise_4x4(v0, v1, v2, v3) + rotate_clockwise_4x4(v4, v5, v6, v7) + + sub x2, x2, #64 + + eor v0.16b, v0.16b, RIV.16b - ld1 {RIV.16b}, [x3]; + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 -.Lcbc_loop_blk: - subs w4, w4, #8; - bmi .Lcbc_end; + eor v1.16b, v1.16b, RTMP0.16b + eor v2.16b, v2.16b, RTMP1.16b + eor v3.16b, v3.16b, RTMP2.16b + eor v4.16b, v4.16b, RTMP3.16b + eor v5.16b, v5.16b, RTMP4.16b + eor v6.16b, v6.16b, RTMP5.16b + eor v7.16b, v7.16b, RTMP6.16b - ld1 {v0.16b-v3.16b}, [x2], #64; - ld1 {v4.16b-v7.16b}, [x2]; + mov RIV.16b, RTMP7.16b - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 - sub x2, x2, #64; - eor v0.16b, v0.16b, RIV.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v1.16b, v1.16b, RTMP0.16b; - eor v2.16b, v2.16b, RTMP1.16b; - eor v3.16b, v3.16b, RTMP2.16b; - st1 {v0.16b-v3.16b}, [x1], #64; + cbz w4, .Lcbc_dec_end + b .Lcbc_dec_loop_8x - eor v4.16b, v4.16b, RTMP3.16b; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v5.16b, v5.16b, RTMP0.16b; - eor v6.16b, v6.16b, RTMP1.16b; - eor v7.16b, v7.16b, RTMP2.16b; +.Lcbc_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcbc_dec_tail - mov RIV.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; + sub w4, w4, #4 - b .Lcbc_loop_blk; + ld1 {v0.16b-v3.16b}, [x2], #64 -.Lcbc_end: + rev32 v4.16b, v0.16b + rev32 v5.16b, v1.16b + rev32 v6.16b, v2.16b + rev32 v7.16b, v3.16b + + transpose_4x4(v4, v5, v6, v7) + + SM4_CRYPT_BLK4_BE(v4, v5, v6, v7) + + eor v4.16b, v4.16b, RIV.16b + eor v5.16b, v5.16b, v0.16b + eor v6.16b, v6.16b, v1.16b + eor v7.16b, v7.16b, v2.16b + + mov RIV.16b, v3.16b + + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lcbc_dec_end + +.Lcbc_dec_tail: + cmp w4, #2 + ld1 {v0.16b}, [x2], #16 + blt .Lcbc_dec_tail_load_done + ld1 {v1.16b}, [x2], #16 + beq .Lcbc_dec_tail_load_done + ld1 {v2.16b}, [x2], #16 + +.Lcbc_dec_tail_load_done: + rev32 v4.16b, v0.16b + rev32 v5.16b, v1.16b + rev32 v6.16b, v2.16b + + transpose_4x4(v4, v5, v6, v7) + + SM4_CRYPT_BLK4_BE(v4, v5, v6, v7) + + cmp w4, #2 + eor v4.16b, v4.16b, RIV.16b + mov RIV.16b, v0.16b + st1 {v4.16b}, [x1], #16 + blt .Lcbc_dec_end + + eor v5.16b, v5.16b, v0.16b + mov RIV.16b, v1.16b + st1 {v5.16b}, [x1], #16 + beq .Lcbc_dec_end + + eor v6.16b, v6.16b, v1.16b + mov RIV.16b, v2.16b + st1 {v6.16b}, [x1], #16 + +.Lcbc_dec_end: /* store new IV */ - st1 {RIV.16b}, [x3]; + st1 {RIV.16b}, [x3] - ret; -SYM_FUNC_END(sm4_neon_cbc_dec_blk8) + ret +SYM_FUNC_END(sm4_neon_cbc_dec) .align 3 -SYM_FUNC_START(sm4_neon_cfb_dec_blk8) +SYM_FUNC_START(sm4_neon_cfb_dec) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: iv (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() + + ld1 {v0.16b}, [x3] + +.Lcfb_dec_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lcfb_dec_4x + + ld1 {v1.16b-v3.16b}, [x2], #48 + ld4 {v4.4s-v7.4s}, [x2] + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + sub x2, x2, #48 + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 + + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + eor v4.16b, v4.16b, RTMP4.16b + eor v5.16b, v5.16b, RTMP5.16b + eor v6.16b, v6.16b, RTMP6.16b + eor v7.16b, v7.16b, RTMP7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + mov v0.16b, RTMP7.16b + + cbz w4, .Lcfb_dec_end + b .Lcfb_dec_loop_8x + +.Lcfb_dec_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lcfb_dec_tail + + sub w4, w4, #4 + + ld1 {v4.16b-v7.16b}, [x2], #64 + + rev32 v0.16b, v0.16b /* v0 is IV register */ + rev32 v1.16b, v4.16b + rev32 v2.16b, v5.16b + rev32 v3.16b, v6.16b + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4_BE(v0, v1, v2, v3) - ld1 {v0.16b}, [x3]; + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b -.Lcfb_loop_blk: - subs w4, w4, #8; - bmi .Lcfb_end; + st1 {v0.16b-v3.16b}, [x1], #64 - ld1 {v1.16b, v2.16b, v3.16b}, [x2], #48; - ld1 {v4.16b-v7.16b}, [x2]; + mov v0.16b, v7.16b - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); + cbz w4, .Lcfb_dec_end - sub x2, x2, #48; - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; +.Lcfb_dec_tail: + cmp w4, #2 + ld1 {v4.16b}, [x2], #16 + blt .Lcfb_dec_tail_load_done + ld1 {v5.16b}, [x2], #16 + beq .Lcfb_dec_tail_load_done + ld1 {v6.16b}, [x2], #16 - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; +.Lcfb_dec_tail_load_done: + rev32 v0.16b, v0.16b /* v0 is IV register */ + rev32 v1.16b, v4.16b + rev32 v2.16b, v5.16b - mov v0.16b, RTMP3.16b; + transpose_4x4(v0, v1, v2, v3) - b .Lcfb_loop_blk; + SM4_CRYPT_BLK4_BE(v0, v1, v2, v3) -.Lcfb_end: + cmp w4, #2 + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x1], #16 + mov v0.16b, v4.16b + blt .Lcfb_dec_end + + eor v1.16b, v1.16b, v5.16b + st1 {v1.16b}, [x1], #16 + mov v0.16b, v5.16b + beq .Lcfb_dec_end + + eor v2.16b, v2.16b, v6.16b + st1 {v2.16b}, [x1], #16 + mov v0.16b, v6.16b + +.Lcfb_dec_end: /* store new IV */ - st1 {v0.16b}, [x3]; + st1 {v0.16b}, [x3] - ret; -SYM_FUNC_END(sm4_neon_cfb_dec_blk8) + ret +SYM_FUNC_END(sm4_neon_cfb_dec) .align 3 -SYM_FUNC_START(sm4_neon_ctr_enc_blk8) +SYM_FUNC_START(sm4_neon_ctr_crypt) /* input: * x0: round key array, CTX * x1: dst * x2: src * x3: ctr (big endian, 128 bit) - * w4: nblocks (multiples of 8) + * w4: nblocks */ - PREPARE; + SM4_PREPARE() - ldp x7, x8, [x3]; - rev x7, x7; - rev x8, x8; + ldp x7, x8, [x3] + rev x7, x7 + rev x8, x8 -.Lctr_loop_blk: - subs w4, w4, #8; - bmi .Lctr_end; +.Lctr_crypt_loop_8x: + sub w4, w4, #8 + tbnz w4, #31, .Lctr_crypt_4x -#define inc_le128(vctr) \ - mov vctr.d[1], x8; \ - mov vctr.d[0], x7; \ - adds x8, x8, #1; \ - adc x7, x7, xzr; \ - rev64 vctr.16b, vctr.16b; +#define inc_le128(vctr) \ + mov vctr.d[1], x8; \ + mov vctr.d[0], x7; \ + adds x8, x8, #1; \ + rev64 vctr.16b, vctr.16b; \ + adc x7, x7, xzr; /* construct CTRs */ - inc_le128(v0); /* +0 */ - inc_le128(v1); /* +1 */ - inc_le128(v2); /* +2 */ - inc_le128(v3); /* +3 */ - inc_le128(v4); /* +4 */ - inc_le128(v5); /* +5 */ - inc_le128(v6); /* +6 */ - inc_le128(v7); /* +7 */ - - SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7); - - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v0.16b, v0.16b, RTMP0.16b; - eor v1.16b, v1.16b, RTMP1.16b; - eor v2.16b, v2.16b, RTMP2.16b; - eor v3.16b, v3.16b, RTMP3.16b; - st1 {v0.16b-v3.16b}, [x1], #64; - - ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64; - eor v4.16b, v4.16b, RTMP0.16b; - eor v5.16b, v5.16b, RTMP1.16b; - eor v6.16b, v6.16b, RTMP2.16b; - eor v7.16b, v7.16b, RTMP3.16b; - st1 {v4.16b-v7.16b}, [x1], #64; - - b .Lctr_loop_blk; - -.Lctr_end: + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + inc_le128(v4) /* +4 */ + inc_le128(v5) /* +5 */ + inc_le128(v6) /* +6 */ + inc_le128(v7) /* +7 */ + + transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7) + + SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7) + + ld1 {RTMP0.16b-RTMP3.16b}, [x2], #64 + ld1 {RTMP4.16b-RTMP7.16b}, [x2], #64 + + eor v0.16b, v0.16b, RTMP0.16b + eor v1.16b, v1.16b, RTMP1.16b + eor v2.16b, v2.16b, RTMP2.16b + eor v3.16b, v3.16b, RTMP3.16b + eor v4.16b, v4.16b, RTMP4.16b + eor v5.16b, v5.16b, RTMP5.16b + eor v6.16b, v6.16b, RTMP6.16b + eor v7.16b, v7.16b, RTMP7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + st1 {v4.16b-v7.16b}, [x1], #64 + + cbz w4, .Lctr_crypt_end + b .Lctr_crypt_loop_8x + +.Lctr_crypt_4x: + add w4, w4, #8 + cmp w4, #4 + blt .Lctr_crypt_tail + + sub w4, w4, #4 + + /* construct CTRs */ + inc_le128(v0) /* +0 */ + inc_le128(v1) /* +1 */ + inc_le128(v2) /* +2 */ + inc_le128(v3) /* +3 */ + + ld1 {v4.16b-v7.16b}, [x2], #64 + + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + eor v0.16b, v0.16b, v4.16b + eor v1.16b, v1.16b, v5.16b + eor v2.16b, v2.16b, v6.16b + eor v3.16b, v3.16b, v7.16b + + st1 {v0.16b-v3.16b}, [x1], #64 + + cbz w4, .Lctr_crypt_end + +.Lctr_crypt_tail: + /* inc_le128 will change the sign bit */ + ld1 {v4.16b}, [x2], #16 + inc_le128(v0) + cmp w4, #2 + blt .Lctr_crypt_tail_load_done + + ld1 {v5.16b}, [x2], #16 + inc_le128(v1) + cmp w4, #2 + beq .Lctr_crypt_tail_load_done + + ld1 {v6.16b}, [x2], #16 + inc_le128(v2) + +.Lctr_crypt_tail_load_done: + transpose_4x4(v0, v1, v2, v3) + + SM4_CRYPT_BLK4(v0, v1, v2, v3) + + cmp w4, #2 + + eor v0.16b, v0.16b, v4.16b + st1 {v0.16b}, [x1], #16 + blt .Lctr_crypt_end + + eor v1.16b, v1.16b, v5.16b + st1 {v1.16b}, [x1], #16 + beq .Lctr_crypt_end + + eor v2.16b, v2.16b, v6.16b + st1 {v2.16b}, [x1], #16 + +.Lctr_crypt_end: /* store new CTR */ - rev x7, x7; - rev x8, x8; - stp x7, x8, [x3]; + rev x7, x7 + rev x8, x8 + stp x7, x8, [x3] - ret; -SYM_FUNC_END(sm4_neon_ctr_enc_blk8) + ret +SYM_FUNC_END(sm4_neon_ctr_crypt) diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c index 03a6a6866a3112f0dc10dc2fc119f7d53c867a1d..7b19accf5c0374d2db5eee4b1ebe3632cd3c8410 100644 --- a/arch/arm64/crypto/sm4-neon-glue.c +++ b/arch/arm64/crypto/sm4-neon-glue.c @@ -18,19 +18,14 @@ #include #include -#define BYTES2BLKS(nbytes) ((nbytes) >> 4) -#define BYTES2BLK8(nbytes) (((nbytes) >> 4) & ~(8 - 1)) - -asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src, - unsigned int nblks); -asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src, - unsigned int nblks); -asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); -asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); -asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src, - u8 *iv, unsigned int nblks); +asmlinkage void sm4_neon_crypt(const u32 *rkey, u8 *dst, const u8 *src, + unsigned int nblocks); +asmlinkage void sm4_neon_cbc_dec(const u32 *rkey_dec, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); +asmlinkage void sm4_neon_cfb_dec(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); +asmlinkage void sm4_neon_ctr_crypt(const u32 *rkey_enc, u8 *dst, const u8 *src, + u8 *iv, unsigned int nblocks); static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) @@ -51,27 +46,18 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_crypt_blk8(rkey, dst, src, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + sm4_neon_crypt(rkey, dst, src, nblocks); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - sm4_neon_crypt_blk1_8(rkey, dst, src, nblks); - nbytes -= nblks * SM4_BLOCK_SIZE; + kernel_neon_end(); } - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; @@ -138,48 +124,19 @@ static int sm4_cbc_decrypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + sm4_neon_cbc_dec(ctx->rkey_dec, dst, src, + walk.iv, nblocks); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - u8 iv[SM4_BLOCK_SIZE]; - int i; - - sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream, - src, nblks); - - src += ((int)nblks - 2) * SM4_BLOCK_SIZE; - dst += (nblks - 1) * SM4_BLOCK_SIZE; - memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE); - - for (i = nblks - 1; i > 0; i--) { - crypto_xor_cpy(dst, src, - &keystream[i * SM4_BLOCK_SIZE], - SM4_BLOCK_SIZE); - src -= SM4_BLOCK_SIZE; - dst -= SM4_BLOCK_SIZE; - } - crypto_xor_cpy(dst, walk.iv, - keystream, SM4_BLOCK_SIZE); - memcpy(walk.iv, iv, SM4_BLOCK_SIZE); - nbytes -= nblks * SM4_BLOCK_SIZE; + kernel_neon_end(); } - kernel_neon_end(); - - err = skcipher_walk_done(&walk, nbytes); + err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE); } return err; @@ -238,41 +195,21 @@ static int sm4_cfb_decrypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + sm4_neon_cfb_dec(ctx->rkey_enc, dst, src, + walk.iv, nblocks); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - - memcpy(keystream, walk.iv, SM4_BLOCK_SIZE); - if (nblks > 1) - memcpy(&keystream[SM4_BLOCK_SIZE], src, - (nblks - 1) * SM4_BLOCK_SIZE); - memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE, - SM4_BLOCK_SIZE); - - sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, - keystream, nblks); - - crypto_xor_cpy(dst, src, keystream, - nblks * SM4_BLOCK_SIZE); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + kernel_neon_end(); - kernel_neon_end(); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } /* tail */ if (walk.nbytes == walk.total && nbytes > 0) { @@ -302,40 +239,21 @@ static int sm4_ctr_crypt(struct skcipher_request *req) while ((nbytes = walk.nbytes) > 0) { const u8 *src = walk.src.virt.addr; u8 *dst = walk.dst.virt.addr; - unsigned int nblks; + unsigned int nblocks; - kernel_neon_begin(); + nblocks = nbytes / SM4_BLOCK_SIZE; + if (nblocks) { + kernel_neon_begin(); - nblks = BYTES2BLK8(nbytes); - if (nblks) { - sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src, - walk.iv, nblks); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src, + walk.iv, nblocks); - nblks = BYTES2BLKS(nbytes); - if (nblks) { - u8 keystream[SM4_BLOCK_SIZE * 8]; - int i; - - for (i = 0; i < nblks; i++) { - memcpy(&keystream[i * SM4_BLOCK_SIZE], - walk.iv, SM4_BLOCK_SIZE); - crypto_inc(walk.iv, SM4_BLOCK_SIZE); - } - sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream, - keystream, nblks); - - crypto_xor_cpy(dst, src, keystream, - nblks * SM4_BLOCK_SIZE); - dst += nblks * SM4_BLOCK_SIZE; - src += nblks * SM4_BLOCK_SIZE; - nbytes -= nblks * SM4_BLOCK_SIZE; - } + kernel_neon_end(); - kernel_neon_end(); + dst += nblocks * SM4_BLOCK_SIZE; + src += nblocks * SM4_BLOCK_SIZE; + nbytes -= nblocks * SM4_BLOCK_SIZE; + } /* tail */ if (walk.nbytes == walk.total && nbytes > 0) { diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0f43c4ff6b5d4fa93315ef3f4bc1a..0df3fc3a017371cf402efc50c32fe326a4e14711 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -135,7 +135,7 @@ * 40 bits wide (T0SZ = 24). Systems with a PARange smaller than 40 bits are * not known to exist and will break with this configuration. * - * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2(). + * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu. * * Note that when using 4K pages, we concatenate two first level page tables * together. With 16K pages, we concatenate 16 first level page tables. @@ -340,9 +340,13 @@ * We have * PAR [PA_Shift - 1 : 12] = PA [PA_Shift - 1 : 12] * HPFAR [PA_Shift - 9 : 4] = FIPA [PA_Shift - 1 : 12] + * + * Always assume 52 bit PA since at this point, we don't know how many PA bits + * the page table has been set up for. This should be safe since unused address + * bits in PAR are res0. */ #define PAR_TO_HPFAR(par) \ - (((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8) + (((par) & GENMASK_ULL(52 - 1, 12)) >> 8) #define ECN(x) { ESR_ELx_EC_##x, #x } diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 53035763e48e891e6e2c0ff43c740c8a26e3d3af..43c3bc0f9544d9a3f477427315d07e6d17fafb94 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -76,6 +76,9 @@ enum __kvm_host_smccc_func { __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vm, + __KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu, + __KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] @@ -106,7 +109,7 @@ enum __kvm_host_smccc_func { #define per_cpu_ptr_nvhe_sym(sym, cpu) \ ({ \ unsigned long base, off; \ - base = kvm_arm_hyp_percpu_base[cpu]; \ + base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; \ off = (unsigned long)&CHOOSE_NVHE_SYM(sym) - \ (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start); \ base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL; \ @@ -211,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector); #define __kvm_hyp_init CHOOSE_NVHE_SYM(__kvm_hyp_init) #define __kvm_hyp_vector CHOOSE_HYP_SYM(__kvm_hyp_vector) -extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; +extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[]; DECLARE_KVM_NVHE_SYM(__per_cpu_start); DECLARE_KVM_NVHE_SYM(__per_cpu_end); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index fd34ab155d0b7c599613b848b4fed6cff8542a00..35a159d131b5f862c16b52842ef6b0ba8088fb00 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu); +struct kvm_hyp_memcache { + phys_addr_t head; + unsigned long nr_pages; +}; + +static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc, + phys_addr_t *p, + phys_addr_t (*to_pa)(void *virt)) +{ + *p = mc->head; + mc->head = to_pa(p); + mc->nr_pages++; +} + +static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc, + void *(*to_va)(phys_addr_t phys)) +{ + phys_addr_t *p = to_va(mc->head); + + if (!mc->nr_pages) + return NULL; + + mc->head = *p; + mc->nr_pages--; + + return p; +} + +static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc, + unsigned long min_pages, + void *(*alloc_fn)(void *arg), + phys_addr_t (*to_pa)(void *virt), + void *arg) +{ + while (mc->nr_pages < min_pages) { + phys_addr_t *p = alloc_fn(arg); + + if (!p) + return -ENOMEM; + push_hyp_memcache(mc, p, to_pa); + } + + return 0; +} + +static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc, + void (*free_fn)(void *virt, void *arg), + void *(*to_va)(phys_addr_t phys), + void *arg) +{ + while (mc->nr_pages) + free_fn(pop_hyp_memcache(mc, to_va), arg); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc); +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages); + struct kvm_vmid { atomic64_t id; }; @@ -115,6 +172,13 @@ struct kvm_smccc_features { unsigned long vendor_hyp_bmap; }; +typedef unsigned int pkvm_handle_t; + +struct kvm_protected_vm { + pkvm_handle_t handle; + struct kvm_hyp_memcache teardown_mc; +}; + struct kvm_arch { struct kvm_s2_mmu mmu; @@ -163,9 +227,19 @@ struct kvm_arch { u8 pfr0_csv2; u8 pfr0_csv3; + struct { + u8 imp:4; + u8 unimp:4; + } dfr0_pmuver; /* Hypercall features firmware registers' descriptor */ struct kvm_smccc_features smccc_feat; + + /* + * For an untrusted host VM, 'pkvm.handle' is used to lookup + * the associated pKVM instance in the hypervisor. + */ + struct kvm_protected_vm pkvm; }; struct kvm_vcpu_fault_info { @@ -925,8 +999,6 @@ int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); - static inline bool kvm_vm_is_protected(struct kvm *kvm) { return false; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index aa7fa2a08f0604af5b25f2eb0f334f4a716b4431..6797eafe7890b596ec97b65beba1a5312143a2f9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -123,4 +123,7 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); +extern unsigned long kvm_nvhe_sym(__icache_flags); +extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits); + #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 7784081088e78f84c7aad3aaf9553a276a6d1b8a..e4a7e636949983e7331289ba519c6fefcb594cca 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu); +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type); void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu); int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, phys_addr_t pa, unsigned long size, bool writable); diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe59e2df67caf19d4bf790eb22a4b8..63f81b27a4e302cede0605430ca0759d308f6f17 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -42,6 +42,8 @@ typedef u64 kvm_pte_t; #define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) +#define KVM_PHYS_INVALID (-1ULL) + static inline bool kvm_pte_valid(kvm_pte_t pte) { return pte & KVM_PTE_VALID; @@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte) return pa; } +static inline kvm_pte_t kvm_phys_to_pte(u64 pa) +{ + kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; + + if (PAGE_SHIFT == 16) { + pa &= GENMASK(51, 48); + pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); + } + + return pte; +} + static inline u64 kvm_granule_shift(u32 level) { /* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */ @@ -85,6 +99,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level) * allocation is physically contiguous. * @free_pages_exact: Free an exact number of memory pages previously * allocated by zalloc_pages_exact. + * @free_removed_table: Free a removed paging structure by unlinking and + * dropping references. * @get_page: Increment the refcount on a page. * @put_page: Decrement the refcount on a page. When the * refcount reaches 0 the page is automatically @@ -103,6 +119,7 @@ struct kvm_pgtable_mm_ops { void* (*zalloc_page)(void *arg); void* (*zalloc_pages_exact)(size_t size); void (*free_pages_exact)(void *addr, size_t size); + void (*free_removed_table)(void *addr, u32 level); void (*get_page)(void *addr); void (*put_page)(void *addr); int (*page_count)(void *addr); @@ -161,29 +178,6 @@ enum kvm_pgtable_prot { typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end, enum kvm_pgtable_prot prot); -/** - * struct kvm_pgtable - KVM page-table. - * @ia_bits: Maximum input address size, in bits. - * @start_level: Level at which the page-table walk starts. - * @pgd: Pointer to the first top-level entry of the page-table. - * @mm_ops: Memory management callbacks. - * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. - * @flags: Stage-2 page-table flags. - * @force_pte_cb: Function that returns true if page level mappings must - * be used instead of block mappings. - */ -struct kvm_pgtable { - u32 ia_bits; - u32 start_level; - kvm_pte_t *pgd; - struct kvm_pgtable_mm_ops *mm_ops; - - /* Stage-2 only */ - struct kvm_s2_mmu *mmu; - enum kvm_pgtable_stage2_flags flags; - kvm_pgtable_force_pte_cb_t force_pte_cb; -}; - /** * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk. * @KVM_PGTABLE_WALK_LEAF: Visit leaf entries, including invalid @@ -192,17 +186,34 @@ struct kvm_pgtable { * children. * @KVM_PGTABLE_WALK_TABLE_POST: Visit table entries after their * children. + * @KVM_PGTABLE_WALK_SHARED: Indicates the page-tables may be shared + * with other software walkers. */ enum kvm_pgtable_walk_flags { KVM_PGTABLE_WALK_LEAF = BIT(0), KVM_PGTABLE_WALK_TABLE_PRE = BIT(1), KVM_PGTABLE_WALK_TABLE_POST = BIT(2), + KVM_PGTABLE_WALK_SHARED = BIT(3), +}; + +struct kvm_pgtable_visit_ctx { + kvm_pte_t *ptep; + kvm_pte_t old; + void *arg; + struct kvm_pgtable_mm_ops *mm_ops; + u64 addr; + u64 end; + u32 level; + enum kvm_pgtable_walk_flags flags; }; -typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg); +typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit); + +static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx) +{ + return ctx->flags & KVM_PGTABLE_WALK_SHARED; +} /** * struct kvm_pgtable_walker - Hook into a page-table walk. @@ -217,6 +228,94 @@ struct kvm_pgtable_walker { const enum kvm_pgtable_walk_flags flags; }; +/* + * RCU cannot be used in a non-kernel context such as the hyp. As such, page + * table walkers used in hyp do not call into RCU and instead use other + * synchronization mechanisms (such as a spinlock). + */ +#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__) + +typedef kvm_pte_t *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return pteref; +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + /* + * Due to the lack of RCU (or a similar protection scheme), only + * non-shared table walkers are allowed in the hypervisor. + */ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + return -EPERM; + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return true; +} + +#else + +typedef kvm_pte_t __rcu *kvm_pteref_t; + +static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker, + kvm_pteref_t pteref) +{ + return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED)); +} + +static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_lock(); + + return 0; +} + +static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) +{ + if (walker->flags & KVM_PGTABLE_WALK_SHARED) + rcu_read_unlock(); +} + +static inline bool kvm_pgtable_walk_lock_held(void) +{ + return rcu_read_lock_held(); +} + +#endif + +/** + * struct kvm_pgtable - KVM page-table. + * @ia_bits: Maximum input address size, in bits. + * @start_level: Level at which the page-table walk starts. + * @pgd: Pointer to the first top-level entry of the page-table. + * @mm_ops: Memory management callbacks. + * @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables. + * @flags: Stage-2 page-table flags. + * @force_pte_cb: Function that returns true if page level mappings must + * be used instead of block mappings. + */ +struct kvm_pgtable { + u32 ia_bits; + u32 start_level; + kvm_pteref_t pgd; + struct kvm_pgtable_mm_ops *mm_ops; + + /* Stage-2 only */ + struct kvm_s2_mmu *mmu; + enum kvm_pgtable_stage2_flags flags; + kvm_pgtable_force_pte_cb_t force_pte_cb; +}; + /** * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table. * @pgt: Uninitialised page-table structure to initialise. @@ -296,6 +395,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); */ u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift); +/** + * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD + * @vtcr: Content of the VTCR register. + * + * Return: the size (in bytes) of the stage-2 PGD + */ +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr); + /** * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table. * @pgt: Uninitialised page-table structure to initialise. @@ -324,6 +431,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, */ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); +/** + * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure. + * @mm_ops: Memory management callbacks. + * @pgtable: Unlinked stage-2 paging structure to be freed. + * @level: Level of the stage-2 paging structure to be freed. + * + * The page-table is assumed to be unreachable by any hardware walkers prior to + * freeing and therefore no TLB invalidation is performed. + */ +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level); + /** * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table. * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*(). @@ -333,6 +451,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); * @prot: Permissions and attributes for the mapping. * @mc: Cache of pre-allocated and zeroed memory from which to allocate * page-table pages. + * @flags: Flags to control the page-table walk (ex. a shared walk) * * The offset of @addr within a page is ignored, @size is rounded-up to * the next page boundary and @phys is rounded-down to the previous page @@ -354,7 +473,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt); */ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc); + void *mc, enum kvm_pgtable_walk_flags flags); /** * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59c046e267e0b6ff0c3f788d11977b..01129b0d4c689e46c149b6d91dd5b3bce99f64d6 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -9,11 +9,49 @@ #include #include +/* Maximum number of VMs that can co-exist under pKVM. */ +#define KVM_MAX_PVMS 255 + #define HYP_MEMBLOCK_REGIONS 128 +int pkvm_init_host_vm(struct kvm *kvm); +int pkvm_create_hyp_vm(struct kvm *kvm); +void pkvm_destroy_hyp_vm(struct kvm *kvm); + extern struct memblock_region kvm_nvhe_sym(hyp_memory)[]; extern unsigned int kvm_nvhe_sym(hyp_memblock_nr); +static inline unsigned long +hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size) +{ + unsigned long nr_pages = reg->size >> PAGE_SHIFT; + unsigned long start, end; + + start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size; + end = start + nr_pages * vmemmap_entry_size; + start = ALIGN_DOWN(start, PAGE_SIZE); + end = ALIGN(end, PAGE_SIZE); + + return end - start; +} + +static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size) +{ + unsigned long res = 0, i; + + for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { + res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i], + vmemmap_entry_size); + } + + return res >> PAGE_SHIFT; +} + +static inline unsigned long hyp_vm_table_pages(void) +{ + return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT; +} + static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) { unsigned long total = 0, i; diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 760c62f8e22f84f95c48de4f1470d9cea67ca536..20dd06d70af5f39c05497d8b138fb15e1323851b 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from, unsigned long n); int mte_save_tags(struct page *page); void mte_save_page_tags(const void *page_addr, void *tag_storage); -bool mte_restore_tags(swp_entry_t entry, struct page *page); +void mte_restore_tags(swp_entry_t entry, struct page *page); void mte_restore_page_tags(void *page_addr, const void *tag_storage); void mte_invalidate_tags(int type, pgoff_t offset); void mte_invalidate_tags_area(int type); @@ -36,6 +36,58 @@ void mte_free_tag_storage(char *storage); /* track which pages have valid allocation tags */ #define PG_mte_tagged PG_arch_2 +/* simple lock to avoid multiple threads tagging the same page */ +#define PG_mte_lock PG_arch_3 + +static inline void set_page_mte_tagged(struct page *page) +{ + /* + * Ensure that the tags written prior to this function are visible + * before the page flags update. + */ + smp_wmb(); + set_bit(PG_mte_tagged, &page->flags); +} + +static inline bool page_mte_tagged(struct page *page) +{ + bool ret = test_bit(PG_mte_tagged, &page->flags); + + /* + * If the page is tagged, ensure ordering with a likely subsequent + * read of the tags. + */ + if (ret) + smp_rmb(); + return ret; +} + +/* + * Lock the page for tagging and return 'true' if the page can be tagged, + * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the + * locking only happens once for page initialisation. + * + * The page MTE lock state: + * + * Locked: PG_mte_lock && !PG_mte_tagged + * Unlocked: !PG_mte_lock || PG_mte_tagged + * + * Acquire semantics only if the page is tagged (returning 'false'). + */ +static inline bool try_page_mte_tagging(struct page *page) +{ + if (!test_and_set_bit(PG_mte_lock, &page->flags)) + return true; + + /* + * The tags are either being initialised or may have been initialised + * already. Check if the PG_mte_tagged flag has been set or wait + * otherwise. + */ + smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged)); + + return false; +} void mte_zero_clear_page_tags(void *addr); void mte_sync_tags(pte_t old_pte, pte_t pte); @@ -56,6 +108,17 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); /* unused if !CONFIG_ARM64_MTE, silence the compiler */ #define PG_mte_tagged 0 +static inline void set_page_mte_tagged(struct page *page) +{ +} +static inline bool page_mte_tagged(struct page *page) +{ + return false; +} +static inline bool try_page_mte_tagging(struct page *page) +{ + return false; +} static inline void mte_zero_clear_page_tags(void *addr) { } diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b3faf7582a53fddbac2ab078731613a151bb9af3..6914add66bcf8857633cea33e1cbc905ba184475 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -1046,8 +1046,8 @@ static inline void arch_swap_invalidate_area(int type) #define __HAVE_ARCH_SWAP_RESTORE static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) { - if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) - set_bit(PG_mte_tagged, &folio->flags); + if (system_supports_mte()) + mte_restore_tags(entry, &folio->page); } #endif /* CONFIG_ARM64_MTE */ diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 316917b9870704de245f002cb4261c46f8a2fea4..a7a857f1784d80d6264eeebecbeb371c7d13ba5b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -43,6 +43,7 @@ #define __KVM_HAVE_VCPU_EVENTS #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_DIRTY_LOG_PAGE_OFFSET 64 #define KVM_REG_SIZE(id) \ (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 3e4075c4750a30f080f9ef6ac2c9347371d5fcf7..0ff7cd74d3016361a202efce63a5e2024c35087c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -2077,8 +2077,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) * Clear the tags in the zero page. This needs to be done via the * linear map which has the Tagged attribute. */ - if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags)) + if (try_page_mte_tagging(ZERO_PAGE(0))) { mte_clear_page_tags(lm_alias(empty_zero_page)); + set_page_mte_tagged(ZERO_PAGE(0)); + } kasan_init_hw_tags_cpu(); } diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c index 27ef7ad3ffd2e27a5c6fdc1e1a63c179df2e56e2..353009d7f307f40e982adf627a3ad9e408c80a50 100644 --- a/arch/arm64/kernel/elfcore.c +++ b/arch/arm64/kernel/elfcore.c @@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, * Pages mapped in user space as !pte_access_permitted() (e.g. * PROT_EXEC only) may not have the PG_mte_tagged flag set. */ - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (!page_mte_tagged(page)) { put_page(page); dump_skip(cprm, MTE_PAGE_TAG_STORAGE); continue; diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index af5df48ba915b9274211f66c33f4e61fcbe89a1e..788597a6b6a2f01ddfa7eae4c67802b3d50d359e 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void) if (!page) continue; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) continue; ret = save_tags(page, pfn); diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index f31130ba02331060fb394fb27e5d9b2cd75cc60e..d0e9bb5c91fccad6c22c16d7ab3e24559a85b2b1 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -63,12 +63,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler); /* Vectors installed by hyp-init on reset HVC. */ KVM_NVHE_ALIAS(__hyp_stub_vectors); -/* Kernel symbol used by icache_is_vpipt(). */ -KVM_NVHE_ALIAS(__icache_flags); - -/* VMID bits set by the KVM VMID allocator */ -KVM_NVHE_ALIAS(kvm_arm_vmid_bits); - /* Static keys which are set if a vGIC trap should be handled in hyp. */ KVM_NVHE_ALIAS(vgic_v2_cpuif_trap); KVM_NVHE_ALIAS(vgic_v3_cpuif_trap); @@ -84,9 +78,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities); KVM_NVHE_ALIAS(__start___kvm_ex_table); KVM_NVHE_ALIAS(__stop___kvm_ex_table); -/* Array containing bases of nVHE per-CPU memory regions. */ -KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); - /* PMU available static key */ #ifdef CONFIG_HW_PERF_EVENTS KVM_NVHE_ALIAS(kvm_arm_pmu_available); @@ -103,12 +94,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy); KVM_NVHE_ALIAS_HYP(__memset, __pi_memset); #endif -/* Kernel memory sections */ -KVM_NVHE_ALIAS(__start_rodata); -KVM_NVHE_ALIAS(__end_rodata); -KVM_NVHE_ALIAS(__bss_start); -KVM_NVHE_ALIAS(__bss_stop); - /* Hyp memory sections */ KVM_NVHE_ALIAS(__hyp_idmap_text_start); KVM_NVHE_ALIAS(__hyp_idmap_text_end); diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index 7467217c1eaf372d19a0eed8398e2e89e04f44c7..f5bcb0dc6267385e3401ed2735429b45af56e8e7 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -41,19 +41,17 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, if (check_swap && is_swap_pte(old_pte)) { swp_entry_t entry = pte_to_swp_entry(old_pte); - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) - return; + if (!non_swap_entry(entry)) + mte_restore_tags(entry, page); } if (!pte_is_tagged) return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); + set_page_mte_tagged(page); + } } void mte_sync_tags(pte_t old_pte, pte_t pte) @@ -69,9 +67,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) /* if PG_mte_tagged is set, tags have already been initialised */ for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) { mte_sync_page_tags(page, old_pte, check_swap, pte_is_tagged); + set_page_mte_tagged(page); + } } /* ensure the tags are visible before the PTE is set */ @@ -96,8 +96,7 @@ int memcmp_pages(struct page *page1, struct page *page2) * pages is tagged, set_pte_at() may zero or change the tags of the * other page via mte_sync_tags(). */ - if (test_bit(PG_mte_tagged, &page1->flags) || - test_bit(PG_mte_tagged, &page2->flags)) + if (page_mte_tagged(page1) || page_mte_tagged(page2)) return addr1 != addr2; return ret; @@ -454,7 +453,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, put_page(page); break; } - WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags)); + WARN_ON_ONCE(!page_mte_tagged(page)); /* limit access to the end of the page */ offset = offset_in_page(addr); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 619e2dc7ee14cce1b906c1962c5429194260ad80..beaf9586338f5c10569764a9728209289a06756c 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -27,7 +27,7 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv \ -Bsymbolic --build-id=sha1 -n $(btildflags-y) ifdef CONFIG_LD_ORPHAN_WARN - ldflags-y += --orphan-handling=warn + ldflags-y += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif ldflags-y += -T diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile index 36c8f66cad251ac3711f73f5b94d353311bde0b7..f59bd1a4ead6b46d31f0a7a6e3d5bf08212b1891 100644 --- a/arch/arm64/kernel/vdso32/Makefile +++ b/arch/arm64/kernel/vdso32/Makefile @@ -104,7 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__ VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1 -VDSO_LDFLAGS += --orphan-handling=warn +VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) # Borrow vdsomunge.c from the arm vDSO diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 815cc118c675f3e0fc96e58df229ffe3f7ce928b..05da3c8f7e88f6a640ef744d8c401e695e490553 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -32,6 +32,8 @@ menuconfig KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_DIRTY_RING_ACQ_REL + select NEED_KVM_DIRTY_RING_WITH_BITMAP select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 94d33e296e10c57c34008af96d7a9adcb72d99ef..9c5573bc4614569ac62ea80457d13173f3db11fa 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -50,7 +51,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized); DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); -unsigned long kvm_arm_hyp_percpu_base[NR_CPUS]; DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); static bool vgic_present; @@ -138,24 +138,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { int ret; - ret = kvm_arm_setup_stage2(kvm, type); - if (ret) - return ret; - - ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu); + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) return ret; - ret = kvm_share_hyp(kvm, kvm + 1); + ret = pkvm_init_host_vm(kvm); if (ret) - goto out_free_stage2_pgd; + goto err_unshare_kvm; if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) { ret = -ENOMEM; - goto out_free_stage2_pgd; + goto err_unshare_kvm; } cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask); + ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type); + if (ret) + goto err_free_cpumask; + kvm_vgic_early_init(kvm); /* The maximum number of VCPUs is limited by the host's GIC model */ @@ -164,9 +164,18 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) set_default_spectre(kvm); kvm_arm_init_hypercalls(kvm); - return ret; -out_free_stage2_pgd: - kvm_free_stage2_pgd(&kvm->arch.mmu); + /* + * Initialise the default PMUver before there is a chance to + * create an actual PMU. + */ + kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit(); + + return 0; + +err_free_cpumask: + free_cpumask_var(kvm->arch.supported_cpus); +err_unshare_kvm: + kvm_unshare_hyp(kvm, kvm + 1); return ret; } @@ -187,6 +196,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_vgic_destroy(kvm); + if (is_protected_kvm_enabled()) + pkvm_destroy_hyp_vm(kvm); + kvm_destroy_vcpus(kvm); kvm_unshare_hyp(kvm, kvm + 1); @@ -569,6 +581,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu) if (ret) return ret; + if (is_protected_kvm_enabled()) { + ret = pkvm_create_hyp_vm(kvm); + if (ret) + return ret; + } + if (!irqchip_in_kernel(kvm)) { /* * Tell the rest of the code that there are userspace irqchip @@ -746,6 +764,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SUSPEND, vcpu)) return kvm_vcpu_suspend(vcpu); + + if (kvm_dirty_ring_check_request(vcpu)) + return 0; } return 1; @@ -1518,7 +1539,7 @@ static int kvm_init_vector_slots(void) return 0; } -static void cpu_prepare_hyp_mode(int cpu) +static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits) { struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu); unsigned long tcr; @@ -1534,23 +1555,9 @@ static void cpu_prepare_hyp_mode(int cpu) params->mair_el2 = read_sysreg(mair_el1); - /* - * The ID map may be configured to use an extended virtual address - * range. This is only the case if system RAM is out of range for the - * currently configured page size and VA_BITS, in which case we will - * also need the extended virtual range for the HYP ID map, or we won't - * be able to enable the EL2 MMU. - * - * However, at EL2, there is only one TTBR register, and we can't switch - * between translation tables *and* update TCR_EL2.T0SZ at the same - * time. Bottom line: we need to use the extended range with *both* our - * translation tables. - * - * So use the same T0SZ value we use for the ID map. - */ tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1; tcr &= ~TCR_T0SZ_MASK; - tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET; + tcr |= TCR_T0SZ(hyp_va_bits); params->tcr_el2 = tcr; params->pgd_pa = kvm_mmu_get_httbr(); @@ -1844,13 +1851,13 @@ static void teardown_hyp_mode(void) free_hyp_pgds(); for_each_possible_cpu(cpu) { free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); - free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order()); + free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order()); } } static int do_pkvm_init(u32 hyp_va_bits) { - void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base); + void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)); int ret; preempt_disable(); @@ -1870,11 +1877,8 @@ static int do_pkvm_init(u32 hyp_va_bits) return ret; } -static int kvm_hyp_init_protection(u32 hyp_va_bits) +static void kvm_hyp_init_symbols(void) { - void *addr = phys_to_virt(hyp_mem_base); - int ret; - kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); @@ -1883,6 +1887,14 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); + kvm_nvhe_sym(__icache_flags) = __icache_flags; + kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits; +} + +static int kvm_hyp_init_protection(u32 hyp_va_bits) +{ + void *addr = phys_to_virt(hyp_mem_base); + int ret; ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) @@ -1950,7 +1962,7 @@ static int init_hyp_mode(void) page_addr = page_address(page); memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size()); - kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr; + kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr; } /* @@ -2043,7 +2055,7 @@ static int init_hyp_mode(void) } for_each_possible_cpu(cpu) { - char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu]; + char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu]; char *percpu_end = percpu_begin + nvhe_percpu_size(); /* Map Hyp percpu pages */ @@ -2054,9 +2066,11 @@ static int init_hyp_mode(void) } /* Prepare the CPU initialization parameters */ - cpu_prepare_hyp_mode(cpu); + cpu_prepare_hyp_mode(cpu, hyp_va_bits); } + kvm_hyp_init_symbols(); + if (is_protected_kvm_enabled()) { init_cpu_logical_map(); @@ -2064,9 +2078,7 @@ static int init_hyp_mode(void) err = -ENODEV; goto out_err; } - } - if (is_protected_kvm_enabled()) { err = kvm_hyp_init_protection(hyp_va_bits); if (err) { kvm_err("Failed to init hyp memory protection\n"); @@ -2130,6 +2142,11 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) return NULL; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} + bool kvm_arch_has_irq_bypass(void) { return true; diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 2ff13a3f847966c2268cb21c91045e1609869265..5626ddb540ce3333d561f9e61c13d3a695f859fe 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, maddr = page_address(page); if (!write) { - if (test_bit(PG_mte_tagged, &page->flags)) + if (page_mte_tagged(page)) num_tags = mte_copy_tags_to_user(tags, maddr, MTE_GRANULES_PER_PAGE); else @@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, clear_user(tags, MTE_GRANULES_PER_PAGE); kvm_release_pfn_clean(pfn); } else { + /* + * Only locking to serialise with a concurrent + * set_pte_at() in the VMM but still overriding the + * tags, hence ignoring the return value. + */ + try_page_mte_tagging(page); num_tags = mte_copy_tags_from_user(maddr, tags, MTE_GRANULES_PER_PAGE); - /* - * Set the flag after checking the write - * completed fully - */ - if (num_tags == MTE_GRANULES_PER_PAGE) - set_bit(PG_mte_tagged, &page->flags); + /* uaccess failed, don't leave stale tags */ + if (num_tags != MTE_GRANULES_PER_PAGE) + mte_clear_page_tags(page); + set_page_mte_tagged(page); kvm_release_pfn_dirty(pfn); } diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c index b3742a6691e87450865eefb61f0018ee1db17158..b257a3b4bfc5c6c7c9fdcba16b1d7c09bc4d76fd 100644 --- a/arch/arm64/kvm/hyp/hyp-constants.c +++ b/arch/arm64/kvm/hyp/hyp-constants.c @@ -2,9 +2,12 @@ #include #include +#include int main(void) { DEFINE(STRUCT_HYP_PAGE_SIZE, sizeof(struct hyp_page)); + DEFINE(PKVM_HYP_VM_SIZE, sizeof(struct pkvm_hyp_vm)); + DEFINE(PKVM_HYP_VCPU_SIZE, sizeof(struct pkvm_hyp_vcpu)); return 0; } diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index 80e99836eac792427fd62e1c2a5be1b762ab5b26..b7bdbe63deed876f238f97cfd1f03f98497d8aaa 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -8,8 +8,10 @@ #define __KVM_NVHE_MEM_PROTECT__ #include #include +#include #include #include +#include #include /* @@ -43,30 +45,45 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot) return prot & PKVM_PAGE_STATE_PROT_MASK; } -struct host_kvm { +struct host_mmu { struct kvm_arch arch; struct kvm_pgtable pgt; struct kvm_pgtable_mm_ops mm_ops; hyp_spinlock_t lock; }; -extern struct host_kvm host_kvm; +extern struct host_mmu host_mmu; -extern const u8 pkvm_hyp_id; +/* This corresponds to page-table locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, +}; + +extern unsigned long hyp_nr_cpus; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); int __pkvm_host_unshare_hyp(u64 pfn); +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages); +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id); int kvm_host_prepare_stage2(void *pgt_pool_base); +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd); void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt); +int hyp_pin_shared_mem(void *from, void *to); +void hyp_unpin_shared_mem(void *from, void *to); +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc); +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc); + static __always_inline void __load_host_stage2(void) { if (static_branch_likely(&kvm_protected_mode_initialized)) - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); else write_sysreg(0, vttbr_el2); } diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h index 592b7edb3edb4cdcb3a43855b3edf3a53c37e346..ab205c4d67748f688c95ed63d00c18b1eca1c7ef 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/memory.h +++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h @@ -38,6 +38,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr) #define hyp_page_to_virt(page) __hyp_va(hyp_page_to_phys(page)) #define hyp_page_to_pool(page) (((struct hyp_page *)page)->pool) +/* + * Refcounting for 'struct hyp_page'. + * hyp_pool::lock must be held if atomic access to the refcount is required. + */ static inline int hyp_page_count(void *addr) { struct hyp_page *p = hyp_virt_to_page(addr); @@ -45,4 +49,27 @@ static inline int hyp_page_count(void *addr) return p->refcount; } +static inline void hyp_page_ref_inc(struct hyp_page *p) +{ + BUG_ON(p->refcount == USHRT_MAX); + p->refcount++; +} + +static inline void hyp_page_ref_dec(struct hyp_page *p) +{ + BUG_ON(!p->refcount); + p->refcount--; +} + +static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) +{ + hyp_page_ref_dec(p); + return (p->refcount == 0); +} + +static inline void hyp_set_page_refcounted(struct hyp_page *p) +{ + BUG_ON(p->refcount); + p->refcount = 1; +} #endif /* __KVM_HYP_MEMORY_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h index 42d8eb9bfe725b49c808b22e4333d69bb7a47ece..d5ec972b5c1ec8a3506d4e5851d249e7f8ea3b8b 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mm.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h @@ -13,9 +13,13 @@ extern struct kvm_pgtable pkvm_pgtable; extern hyp_spinlock_t pkvm_pgd_lock; +int hyp_create_pcpu_fixmap(void); +void *hyp_fixmap_map(phys_addr_t phys); +void hyp_fixmap_unmap(void); + int hyp_create_idmap(u32 hyp_va_bits); int hyp_map_vectors(void); -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back); +int hyp_back_vmemmap(phys_addr_t back); int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot); int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot); @@ -24,16 +28,4 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size, unsigned long *haddr); int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr); -static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size, - unsigned long *start, unsigned long *end) -{ - unsigned long nr_pages = size >> PAGE_SHIFT; - struct hyp_page *p = hyp_phys_to_page(phys); - - *start = (unsigned long)p; - *end = *start + nr_pages * sizeof(struct hyp_page); - *start = ALIGN_DOWN(*start, PAGE_SIZE); - *end = ALIGN(*end, PAGE_SIZE); -} - #endif /* __KVM_HYP_MM_H */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h new file mode 100644 index 0000000000000000000000000000000000000000..82b3d62538a6111cc3c3d6d65d99d5e613d14fae --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h @@ -0,0 +1,68 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_PKVM_H__ +#define __ARM64_KVM_NVHE_PKVM_H__ + +#include + +#include +#include + +/* + * Holds the relevant data for maintaining the vcpu state completely at hyp. + */ +struct pkvm_hyp_vcpu { + struct kvm_vcpu vcpu; + + /* Backpointer to the host's (untrusted) vCPU instance. */ + struct kvm_vcpu *host_vcpu; +}; + +/* + * Holds the relevant data for running a protected vm. + */ +struct pkvm_hyp_vm { + struct kvm kvm; + + /* Backpointer to the host's (untrusted) KVM instance. */ + struct kvm *host_kvm; + + /* The guest's stage-2 page-table managed by the hypervisor. */ + struct kvm_pgtable pgt; + struct kvm_pgtable_mm_ops mm_ops; + struct hyp_pool pool; + hyp_spinlock_t lock; + + /* + * The number of vcpus initialized and ready to run. + * Modifying this is protected by 'vm_table_lock'. + */ + unsigned int nr_vcpus; + + /* Array of the hyp vCPU structures for this VM. */ + struct pkvm_hyp_vcpu *vcpus[]; +}; + +static inline struct pkvm_hyp_vm * +pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm); +} + +void pkvm_hyp_vm_table_init(void *tbl); + +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva); +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva); +int __pkvm_teardown_vm(pkvm_handle_t handle); + +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx); +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu); + +#endif /* __ARM64_KVM_NVHE_PKVM_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h index 4652fd04bdbee5b94f4922f010585bc4fa6a65e6..7c7ea8c55405f589f741a414f71d486120710092 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h +++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h @@ -28,9 +28,17 @@ typedef union hyp_spinlock { }; } hyp_spinlock_t; +#define __HYP_SPIN_LOCK_INITIALIZER \ + { .__val = 0 } + +#define __HYP_SPIN_LOCK_UNLOCKED \ + ((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER) + +#define DEFINE_HYP_SPINLOCK(x) hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED + #define hyp_spin_lock_init(l) \ do { \ - *(l) = (hyp_spinlock_t){ .__val = 0 }; \ + *(l) = __HYP_SPIN_LOCK_UNLOCKED; \ } while (0) static inline void hyp_spin_lock(hyp_spinlock_t *lock) diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 0c367eb5f4e28dce0dcd0986c10413b5361f5faf..85936c17ae4076df6a03b87efddb86f2bec9da98 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc) ret SYM_FUNC_END(__pi_dcache_clean_inval_poc) SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) + +SYM_FUNC_START(__pi_icache_inval_pou) +alternative_if ARM64_HAS_CACHE_DIC + isb + ret +alternative_else_nop_endif + + invalidate_icache_by_line x0, x1, x2, x3 + ret +SYM_FUNC_END(__pi_icache_inval_pou) +SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 3cea4b6ac23ec114c037348fee1363ad25edeff6..728e01d4536b02d519a591c7aecbc61a75642d47 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -15,17 +15,93 @@ #include #include +#include #include DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params); void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt); +static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + + hyp_vcpu->vcpu.arch.ctxt = host_vcpu->arch.ctxt; + + hyp_vcpu->vcpu.arch.sve_state = kern_hyp_va(host_vcpu->arch.sve_state); + hyp_vcpu->vcpu.arch.sve_max_vl = host_vcpu->arch.sve_max_vl; + + hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu; + + hyp_vcpu->vcpu.arch.hcr_el2 = host_vcpu->arch.hcr_el2; + hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2; + hyp_vcpu->vcpu.arch.cptr_el2 = host_vcpu->arch.cptr_el2; + + hyp_vcpu->vcpu.arch.iflags = host_vcpu->arch.iflags; + hyp_vcpu->vcpu.arch.fp_state = host_vcpu->arch.fp_state; + + hyp_vcpu->vcpu.arch.debug_ptr = kern_hyp_va(host_vcpu->arch.debug_ptr); + hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state; + + hyp_vcpu->vcpu.arch.vsesr_el2 = host_vcpu->arch.vsesr_el2; + + hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3; +} + +static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu; + struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3; + struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3; + unsigned int i; + + host_vcpu->arch.ctxt = hyp_vcpu->vcpu.arch.ctxt; + + host_vcpu->arch.hcr_el2 = hyp_vcpu->vcpu.arch.hcr_el2; + host_vcpu->arch.cptr_el2 = hyp_vcpu->vcpu.arch.cptr_el2; + + host_vcpu->arch.fault = hyp_vcpu->vcpu.arch.fault; + + host_vcpu->arch.iflags = hyp_vcpu->vcpu.arch.iflags; + host_vcpu->arch.fp_state = hyp_vcpu->vcpu.arch.fp_state; + + host_cpu_if->vgic_hcr = hyp_cpu_if->vgic_hcr; + for (i = 0; i < hyp_cpu_if->used_lrs; ++i) + host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i]; +} + static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt) { - DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1); + int ret; - cpu_reg(host_ctxt, 1) = __kvm_vcpu_run(kern_hyp_va(vcpu)); + host_vcpu = kern_hyp_va(host_vcpu); + + if (unlikely(is_protected_kvm_enabled())) { + struct pkvm_hyp_vcpu *hyp_vcpu; + struct kvm *host_kvm; + + host_kvm = kern_hyp_va(host_vcpu->kvm); + hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle, + host_vcpu->vcpu_idx); + if (!hyp_vcpu) { + ret = -EINVAL; + goto out; + } + + flush_hyp_vcpu(hyp_vcpu); + + ret = __kvm_vcpu_run(&hyp_vcpu->vcpu); + + sync_hyp_vcpu(hyp_vcpu); + pkvm_put_hyp_vcpu(hyp_vcpu); + } else { + /* The host is fully trusted, run its vCPU directly. */ + ret = __kvm_vcpu_run(host_vcpu); + } + +out: + cpu_reg(host_ctxt, 1) = ret; } static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt) @@ -191,6 +267,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); } +static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1); + DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2); + DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3); + + host_kvm = kern_hyp_va(host_kvm); + cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva); +} + +static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2); + DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3); + + host_vcpu = kern_hyp_va(host_vcpu); + cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva); +} + +static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -220,6 +323,9 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), HANDLE_FUNC(__pkvm_vcpu_init_traps), + HANDLE_FUNC(__pkvm_init_vm), + HANDLE_FUNC(__pkvm_init_vcpu), + HANDLE_FUNC(__pkvm_teardown_vm), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c index 9f54833af400972670ceddec0a7904d190a005e5..04d194583f1e47a42079726276bc941cdc146641 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c @@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu) return hyp_cpu_logical_map[cpu]; } +unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS]; + unsigned long __hyp_per_cpu_offset(unsigned int cpu) { unsigned long *cpu_base_array; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 07f9dc9848ef114849103110ee97aef91e95d5b6..552653fa18be34b2cde379d04ad6acb27395e3c8 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -21,21 +21,33 @@ #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP) -extern unsigned long hyp_nr_cpus; -struct host_kvm host_kvm; +struct host_mmu host_mmu; static struct hyp_pool host_s2_pool; -const u8 pkvm_hyp_id = 1; +static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm); +#define current_vm (*this_cpu_ptr(&__current_vm)) + +static void guest_lock_component(struct pkvm_hyp_vm *vm) +{ + hyp_spin_lock(&vm->lock); + current_vm = vm; +} + +static void guest_unlock_component(struct pkvm_hyp_vm *vm) +{ + current_vm = NULL; + hyp_spin_unlock(&vm->lock); +} static void host_lock_component(void) { - hyp_spin_lock(&host_kvm.lock); + hyp_spin_lock(&host_mmu.lock); } static void host_unlock_component(void) { - hyp_spin_unlock(&host_kvm.lock); + hyp_spin_unlock(&host_mmu.lock); } static void hyp_lock_component(void) @@ -79,6 +91,11 @@ static void host_s2_put_page(void *addr) hyp_put_page(&host_s2_pool, addr); } +static void host_s2_free_removed_table(void *addr, u32 level) +{ + kvm_pgtable_stage2_free_removed(&host_mmu.mm_ops, addr, level); +} + static int prepare_s2_pool(void *pgt_pool_base) { unsigned long nr_pages, pfn; @@ -90,9 +107,10 @@ static int prepare_s2_pool(void *pgt_pool_base) if (ret) return ret; - host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) { + host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_pages_exact = host_s2_zalloc_pages_exact, .zalloc_page = host_s2_zalloc_page, + .free_removed_table = host_s2_free_removed_table, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .page_count = hyp_page_count, @@ -111,7 +129,7 @@ static void prepare_host_vtcr(void) parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val); phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange); - host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, + host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val, id_aa64mmfr1_el1_sys_val, phys_shift); } @@ -119,45 +137,170 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr int kvm_host_prepare_stage2(void *pgt_pool_base) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; int ret; prepare_host_vtcr(); - hyp_spin_lock_init(&host_kvm.lock); - mmu->arch = &host_kvm.arch; + hyp_spin_lock_init(&host_mmu.lock); + mmu->arch = &host_mmu.arch; ret = prepare_s2_pool(pgt_pool_base); if (ret) return ret; - ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu, - &host_kvm.mm_ops, KVM_HOST_S2_FLAGS, + ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu, + &host_mmu.mm_ops, KVM_HOST_S2_FLAGS, host_stage2_force_pte_cb); if (ret) return ret; - mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd); - mmu->pgt = &host_kvm.pgt; + mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd); + mmu->pgt = &host_mmu.pgt; atomic64_set(&mmu->vmid.id, 0); return 0; } +static bool guest_stage2_force_pte_cb(u64 addr, u64 end, + enum kvm_pgtable_prot prot) +{ + return true; +} + +static void *guest_s2_zalloc_pages_exact(size_t size) +{ + void *addr = hyp_alloc_pages(¤t_vm->pool, get_order(size)); + + WARN_ON(size != (PAGE_SIZE << get_order(size))); + hyp_split_page(hyp_virt_to_page(addr)); + + return addr; +} + +static void guest_s2_free_pages_exact(void *addr, unsigned long size) +{ + u8 order = get_order(size); + unsigned int i; + + for (i = 0; i < (1 << order); i++) + hyp_put_page(¤t_vm->pool, addr + (i * PAGE_SIZE)); +} + +static void *guest_s2_zalloc_page(void *mc) +{ + struct hyp_page *p; + void *addr; + + addr = hyp_alloc_pages(¤t_vm->pool, 0); + if (addr) + return addr; + + addr = pop_hyp_memcache(mc, hyp_phys_to_virt); + if (!addr) + return addr; + + memset(addr, 0, PAGE_SIZE); + p = hyp_virt_to_page(addr); + memset(p, 0, sizeof(*p)); + p->refcount = 1; + + return addr; +} + +static void guest_s2_get_page(void *addr) +{ + hyp_get_page(¤t_vm->pool, addr); +} + +static void guest_s2_put_page(void *addr) +{ + hyp_put_page(¤t_vm->pool, addr); +} + +static void clean_dcache_guest_page(void *va, size_t size) +{ + __clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +static void invalidate_icache_guest_page(void *va, size_t size) +{ + __invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size); + hyp_fixmap_unmap(); +} + +int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd) +{ + struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu; + unsigned long nr_pages; + int ret; + + nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT; + ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0); + if (ret) + return ret; + + hyp_spin_lock_init(&vm->lock); + vm->mm_ops = (struct kvm_pgtable_mm_ops) { + .zalloc_pages_exact = guest_s2_zalloc_pages_exact, + .free_pages_exact = guest_s2_free_pages_exact, + .zalloc_page = guest_s2_zalloc_page, + .phys_to_virt = hyp_phys_to_virt, + .virt_to_phys = hyp_virt_to_phys, + .page_count = hyp_page_count, + .get_page = guest_s2_get_page, + .put_page = guest_s2_put_page, + .dcache_clean_inval_poc = clean_dcache_guest_page, + .icache_inval_pou = invalidate_icache_guest_page, + }; + + guest_lock_component(vm); + ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0, + guest_stage2_force_pte_cb); + guest_unlock_component(vm); + if (ret) + return ret; + + vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd); + + return 0; +} + +void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc) +{ + void *addr; + + /* Dump all pgtable pages in the hyp_pool */ + guest_lock_component(vm); + kvm_pgtable_stage2_destroy(&vm->pgt); + vm->kvm.arch.mmu.pgd_phys = 0ULL; + guest_unlock_component(vm); + + /* Drain the hyp_pool into the memcache */ + addr = hyp_alloc_pages(&vm->pool, 0); + while (addr) { + memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page)); + push_hyp_memcache(mc, addr, hyp_virt_to_phys); + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1)); + addr = hyp_alloc_pages(&vm->pool, 0); + } +} + int __pkvm_prot_finalize(void) { - struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; + struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); if (params->hcr_el2 & HCR_VM) return -EPERM; params->vttbr = kvm_get_vttbr(mmu); - params->vtcr = host_kvm.arch.vtcr; + params->vtcr = host_mmu.arch.vtcr; params->hcr_el2 |= HCR_VM; kvm_flush_dcache_to_poc(params, sizeof(*params)); write_sysreg(params->hcr_el2, hcr_el2); - __load_stage2(&host_kvm.arch.mmu, &host_kvm.arch); + __load_stage2(&host_mmu.arch.mmu, &host_mmu.arch); /* * Make sure to have an ISB before the TLB maintenance below but only @@ -175,7 +318,7 @@ int __pkvm_prot_finalize(void) static int host_stage2_unmap_dev_all(void) { - struct kvm_pgtable *pgt = &host_kvm.pgt; + struct kvm_pgtable *pgt = &host_mmu.pgt; struct memblock_region *reg; u64 addr = 0; int i, ret; @@ -195,7 +338,7 @@ struct kvm_mem_range { u64 end; }; -static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) +static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) { int cur, left = 0, right = hyp_memblock_nr; struct memblock_region *reg; @@ -218,18 +361,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range) } else { range->start = reg->base; range->end = end; - return true; + return reg; } } - return false; + return NULL; } bool addr_is_memory(phys_addr_t phys) { struct kvm_mem_range range; - return find_mem_range(phys, &range); + return !!find_mem_range(phys, &range); +} + +static bool addr_is_allowed_memory(phys_addr_t phys) +{ + struct memblock_region *reg; + struct kvm_mem_range range; + + reg = find_mem_range(phys, &range); + + return reg && !(reg->flags & MEMBLOCK_NOMAP); } static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range) @@ -250,8 +403,8 @@ static bool range_is_memory(u64 start, u64 end) static inline int __host_stage2_idmap(u64 start, u64 end, enum kvm_pgtable_prot prot) { - return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start, - prot, &host_s2_pool); + return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start, + prot, &host_s2_pool, 0); } /* @@ -263,7 +416,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end, #define host_stage2_try(fn, ...) \ ({ \ int __ret; \ - hyp_assert_lock_held(&host_kvm.lock); \ + hyp_assert_lock_held(&host_mmu.lock); \ __ret = fn(__VA_ARGS__); \ if (__ret == -ENOMEM) { \ __ret = host_stage2_unmap_dev_all(); \ @@ -286,8 +439,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range) u32 level; int ret; - hyp_assert_lock_held(&host_kvm.lock); - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level); + hyp_assert_lock_held(&host_mmu.lock); + ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level); if (ret) return ret; @@ -319,7 +472,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size, int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id) { - return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt, + return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt, addr, size, &host_s2_pool, owner_id); } @@ -348,7 +501,7 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr static int host_stage2_idmap(u64 addr) { struct kvm_mem_range range; - bool is_memory = find_mem_range(addr, &range); + bool is_memory = !!find_mem_range(addr, &range); enum kvm_pgtable_prot prot; int ret; @@ -380,12 +533,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) BUG_ON(ret && ret != -EAGAIN); } -/* This corresponds to locking order */ -enum pkvm_component_id { - PKVM_ID_HOST, - PKVM_ID_HYP, -}; - struct pkvm_mem_transition { u64 nr_pages; @@ -399,6 +546,9 @@ struct pkvm_mem_transition { /* Address in the completer's address space */ u64 completer_addr; } host; + struct { + u64 completer_addr; + } hyp; }; } initiator; @@ -412,23 +562,24 @@ struct pkvm_mem_share { const enum kvm_pgtable_prot completer_prot; }; +struct pkvm_mem_donation { + const struct pkvm_mem_transition tx; +}; + struct check_walk_data { enum pkvm_page_state desired; enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); }; -static int __check_page_state_visitor(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct check_walk_data *d = arg; - kvm_pte_t pte = *ptep; + struct check_walk_data *d = ctx->arg; - if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) + if (kvm_pte_valid(ctx->old) && !addr_is_allowed_memory(kvm_pte_to_phys(ctx->old))) return -EINVAL; - return d->get_page_state(pte) == d->desired ? 0 : -EPERM; + return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM; } static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, @@ -459,8 +610,8 @@ static int __host_check_page_state_range(u64 addr, u64 size, .get_page_state = host_get_page_state, }; - hyp_assert_lock_held(&host_kvm.lock); - return check_page_state_range(&host_kvm.pgt, addr, size, &d); + hyp_assert_lock_held(&host_mmu.lock); + return check_page_state_range(&host_mmu.pgt, addr, size, &d); } static int __host_set_page_state_range(u64 addr, u64 size, @@ -511,6 +662,46 @@ static int host_initiate_unshare(u64 *completer_addr, return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); } +static int host_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u8 owner_id = tx->completer.id; + u64 size = tx->nr_pages * PAGE_SIZE; + + *completer_addr = tx->initiator.host.completer_addr; + return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id); +} + +static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HYP); +} + +static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx, + enum pkvm_page_state state) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__host_ack_skip_pgtable_check(tx)) + return 0; + + return __host_check_page_state_range(addr, size, state); +} + +static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + return __host_ack_transition(addr, tx, PKVM_NOPAGE); +} + +static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u8 host_id = tx->completer.id; + + return host_stage2_set_owner_locked(addr, size, host_id); +} + static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) { if (!kvm_pte_valid(pte)) @@ -531,6 +722,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size, return check_page_state_range(&pkvm_pgtable, addr, size, &d); } +static int hyp_request_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.hyp.completer_addr; + return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int hyp_initiate_donation(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret; + + *completer_addr = tx->initiator.hyp.completer_addr; + ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size); + return (ret != size) ? -EFAULT : 0; +} + static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) { return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || @@ -555,6 +767,9 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) { u64 size = tx->nr_pages * PAGE_SIZE; + if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr)) + return -EBUSY; + if (__hyp_ack_skip_pgtable_check(tx)) return 0; @@ -562,6 +777,16 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) PKVM_PAGE_SHARED_BORROWED); } +static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, enum kvm_pgtable_prot perms) { @@ -580,6 +805,15 @@ static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) return (ret != size) ? -EFAULT : 0; } +static int hyp_complete_donation(u64 addr, + const struct pkvm_mem_transition *tx) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED); + + return pkvm_create_mappings_locked(start, end, prot); +} + static int check_share(struct pkvm_mem_share *share) { const struct pkvm_mem_transition *tx = &share->tx; @@ -732,6 +966,94 @@ static int do_unshare(struct pkvm_mem_share *share) return WARN_ON(__do_unshare(share)); } +static int check_donation(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_request_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_ack_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_ack_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +static int __do_donate(struct pkvm_mem_donation *donation) +{ + const struct pkvm_mem_transition *tx = &donation->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_donation(&completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_initiate_donation(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HOST: + ret = host_complete_donation(completer_addr, tx); + break; + case PKVM_ID_HYP: + ret = hyp_complete_donation(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_donate(): + * + * The page owner transfers ownership to another component, losing access + * as a consequence. + * + * Initiator: OWNED => NOPAGE + * Completer: NOPAGE => OWNED + */ +static int do_donate(struct pkvm_mem_donation *donation) +{ + int ret; + + ret = check_donation(donation); + if (ret) + return ret; + + return WARN_ON(__do_donate(donation)); +} + int __pkvm_host_share_hyp(u64 pfn) { int ret; @@ -797,3 +1119,112 @@ int __pkvm_host_unshare_hyp(u64 pfn) return ret; } + +int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_donation donation = { + .tx = { + .nr_pages = nr_pages, + .initiator = { + .id = PKVM_ID_HYP, + .addr = hyp_addr, + .hyp = { + .completer_addr = host_addr, + }, + }, + .completer = { + .id = PKVM_ID_HOST, + }, + }, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_donate(&donation); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int hyp_pin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + u64 size = end - start; + int ret; + + host_lock_component(); + hyp_lock_component(); + + ret = __host_check_page_state_range(__hyp_pa(start), size, + PKVM_PAGE_SHARED_OWNED); + if (ret) + goto unlock; + + ret = __hyp_check_page_state_range(start, size, + PKVM_PAGE_SHARED_BORROWED); + if (ret) + goto unlock; + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_inc(hyp_virt_to_page(cur)); + +unlock: + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +void hyp_unpin_shared_mem(void *from, void *to) +{ + u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE); + u64 end = PAGE_ALIGN((u64)to); + + host_lock_component(); + hyp_lock_component(); + + for (cur = start; cur < end; cur += PAGE_SIZE) + hyp_page_ref_dec(hyp_virt_to_page(cur)); + + hyp_unlock_component(); + host_unlock_component(); +} diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c index 96193cb31a399da80598f9517a7c3e0259676b58..318298eb3d6b20eeda1854b10adf06ce676ee790 100644 --- a/arch/arm64/kvm/hyp/nvhe/mm.c +++ b/arch/arm64/kvm/hyp/nvhe/mm.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -25,6 +26,12 @@ unsigned int hyp_memblock_nr; static u64 __io_map_base; +struct hyp_fixmap_slot { + u64 addr; + kvm_pte_t *ptep; +}; +static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots); + static int __pkvm_create_mappings(unsigned long start, unsigned long size, unsigned long phys, enum kvm_pgtable_prot prot) { @@ -129,13 +136,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot) return ret; } -int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back) +int hyp_back_vmemmap(phys_addr_t back) { - unsigned long start, end; + unsigned long i, start, size, end = 0; + int ret; - hyp_vmemmap_range(phys, size, &start, &end); + for (i = 0; i < hyp_memblock_nr; i++) { + start = hyp_memory[i].base; + start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE); + /* + * The begining of the hyp_vmemmap region for the current + * memblock may already be backed by the page backing the end + * the previous region, so avoid mapping it twice. + */ + start = max(start, end); + + end = hyp_memory[i].base + hyp_memory[i].size; + end = PAGE_ALIGN((u64)hyp_phys_to_page(end)); + if (start >= end) + continue; + + size = end - start; + ret = __pkvm_create_mappings(start, size, back, PAGE_HYP); + if (ret) + return ret; + + memset(hyp_phys_to_virt(back), 0, size); + back += size; + } - return __pkvm_create_mappings(start, end - start, back, PAGE_HYP); + return 0; } static void *__hyp_bp_vect_base; @@ -189,6 +219,102 @@ int hyp_map_vectors(void) return 0; } +void *hyp_fixmap_map(phys_addr_t phys) +{ + struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots); + kvm_pte_t pte, *ptep = slot->ptep; + + pte = *ptep; + pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID); + pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID; + WRITE_ONCE(*ptep, pte); + dsb(ishst); + + return (void *)slot->addr; +} + +static void fixmap_clear_slot(struct hyp_fixmap_slot *slot) +{ + kvm_pte_t *ptep = slot->ptep; + u64 addr = slot->addr; + + WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID); + + /* + * Irritatingly, the architecture requires that we use inner-shareable + * broadcast TLB invalidation here in case another CPU speculates + * through our fixmap and decides to create an "amalagamation of the + * values held in the TLB" due to the apparent lack of a + * break-before-make sequence. + * + * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03 + */ + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1)); + dsb(ish); + isb(); +} + +void hyp_fixmap_unmap(void) +{ + fixmap_clear_slot(this_cpu_ptr(&fixmap_slots)); +} + +static int __create_fixmap_slot_cb(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) +{ + struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)ctx->arg); + + if (!kvm_pte_valid(ctx->old) || ctx->level != KVM_PGTABLE_MAX_LEVELS - 1) + return -EINVAL; + + slot->addr = ctx->addr; + slot->ptep = ctx->ptep; + + /* + * Clear the PTE, but keep the page-table page refcount elevated to + * prevent it from ever being freed. This lets us manipulate the PTEs + * by hand safely without ever needing to allocate memory. + */ + fixmap_clear_slot(slot); + + return 0; +} + +static int create_fixmap_slot(u64 addr, u64 cpu) +{ + struct kvm_pgtable_walker walker = { + .cb = __create_fixmap_slot_cb, + .flags = KVM_PGTABLE_WALK_LEAF, + .arg = (void *)cpu, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker); +} + +int hyp_create_pcpu_fixmap(void) +{ + unsigned long addr, i; + int ret; + + for (i = 0; i < hyp_nr_cpus; i++) { + ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr); + if (ret) + return ret; + + ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE, + __hyp_pa(__hyp_bss_start), PAGE_HYP); + if (ret) + return ret; + + ret = create_fixmap_slot(addr, i); + if (ret) + return ret; + } + + return 0; +} + int hyp_create_idmap(u32 hyp_va_bits) { unsigned long start, end; @@ -213,3 +339,36 @@ int hyp_create_idmap(u32 hyp_va_bits) return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC); } + +static void *admit_host_page(void *arg) +{ + struct kvm_hyp_memcache *host_mc = arg; + + if (!host_mc->nr_pages) + return NULL; + + /* + * The host still owns the pages in its memcache, so we need to go + * through a full host-to-hyp donation cycle to change it. Fortunately, + * __pkvm_host_donate_hyp() takes care of races for us, so if it + * succeeds we're good to go. + */ + if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1)) + return NULL; + + return pop_hyp_memcache(host_mc, hyp_phys_to_virt); +} + +/* Refill our local memcache by poping pages from the one provided by the host. */ +int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages, + struct kvm_hyp_memcache *host_mc) +{ + struct kvm_hyp_memcache tmp = *host_mc; + int ret; + + ret = __topup_hyp_memcache(mc, min_pages, admit_host_page, + hyp_virt_to_phys, &tmp); + *host_mc = tmp; + + return ret; +} diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c index d40f0b30b534701d2b35500469e0f79533c9a8c4..803ba3222e75fc7fd226004466b198042b0cb13c 100644 --- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c @@ -93,11 +93,16 @@ static inline struct hyp_page *node_to_page(struct list_head *node) static void __hyp_attach_page(struct hyp_pool *pool, struct hyp_page *p) { + phys_addr_t phys = hyp_page_to_phys(p); unsigned short order = p->order; struct hyp_page *buddy; memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order); + /* Skip coalescing for 'external' pages being freed into the pool. */ + if (phys < pool->range_start || phys >= pool->range_end) + goto insert; + /* * Only the first struct hyp_page of a high-order page (otherwise known * as the 'head') should have p->order set. The non-head pages should @@ -116,6 +121,7 @@ static void __hyp_attach_page(struct hyp_pool *pool, p = min(p, buddy); } +insert: /* Mark the new head, and insert it */ p->order = order; page_add_to_list(p, &pool->free_area[order]); @@ -144,25 +150,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool, return p; } -static inline void hyp_page_ref_inc(struct hyp_page *p) -{ - BUG_ON(p->refcount == USHRT_MAX); - p->refcount++; -} - -static inline int hyp_page_ref_dec_and_test(struct hyp_page *p) -{ - BUG_ON(!p->refcount); - p->refcount--; - return (p->refcount == 0); -} - -static inline void hyp_set_page_refcounted(struct hyp_page *p) -{ - BUG_ON(p->refcount); - p->refcount = 1; -} - static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p) { if (hyp_page_ref_dec_and_test(p)) @@ -249,10 +236,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages, /* Init the vmemmap portion */ p = hyp_phys_to_page(phys); - for (i = 0; i < nr_pages; i++) { - p[i].order = 0; + for (i = 0; i < nr_pages; i++) hyp_set_page_refcounted(&p[i]); - } /* Attach the unused pages to the buddy tree */ for (i = reserved_pages; i < nr_pages; i++) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 85d3b7ae720fb0ae78e79709e9c555ab01531e0d..a06ece14a6d8ef63fefb15684d6fcacf3876b928 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -7,8 +7,17 @@ #include #include #include +#include +#include +#include #include +/* Used by icache_is_vpipt(). */ +unsigned long __icache_flags; + +/* Used by kvm_get_vttbr(). */ +unsigned int kvm_arm_vmid_bits; + /* * Set trap register values based on features in ID_AA64PFR0. */ @@ -183,3 +192,430 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) pvm_init_traps_aa64mmfr0(vcpu); pvm_init_traps_aa64mmfr1(vcpu); } + +/* + * Start the VM table handle at the offset defined instead of at 0. + * Mainly for sanity checking and debugging. + */ +#define HANDLE_OFFSET 0x1000 + +static unsigned int vm_handle_to_idx(pkvm_handle_t handle) +{ + return handle - HANDLE_OFFSET; +} + +static pkvm_handle_t idx_to_vm_handle(unsigned int idx) +{ + return idx + HANDLE_OFFSET; +} + +/* + * Spinlock for protecting state related to the VM table. Protects writes + * to 'vm_table' and 'nr_table_entries' as well as reads and writes to + * 'last_hyp_vcpu_lookup'. + */ +static DEFINE_HYP_SPINLOCK(vm_table_lock); + +/* + * The table of VM entries for protected VMs in hyp. + * Allocated at hyp initialization and setup. + */ +static struct pkvm_hyp_vm **vm_table; + +void pkvm_hyp_vm_table_init(void *tbl) +{ + WARN_ON(vm_table); + vm_table = tbl; +} + +/* + * Return the hyp vm structure corresponding to the handle. + */ +static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle) +{ + unsigned int idx = vm_handle_to_idx(handle); + + if (unlikely(idx >= KVM_MAX_PVMS)) + return NULL; + + return vm_table[idx]; +} + +struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle, + unsigned int vcpu_idx) +{ + struct pkvm_hyp_vcpu *hyp_vcpu = NULL; + struct pkvm_hyp_vm *hyp_vm; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx) + goto unlock; + + hyp_vcpu = hyp_vm->vcpus[vcpu_idx]; + hyp_page_ref_inc(hyp_virt_to_page(hyp_vm)); +unlock: + hyp_spin_unlock(&vm_table_lock); + return hyp_vcpu; +} + +void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu) +{ + struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu); + + hyp_spin_lock(&vm_table_lock); + hyp_page_ref_dec(hyp_virt_to_page(hyp_vm)); + hyp_spin_unlock(&vm_table_lock); +} + +static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu) +{ + if (host_vcpu) + hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1); +} + +static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[], + unsigned int nr_vcpus) +{ + int i; + + for (i = 0; i < nr_vcpus; i++) + unpin_host_vcpu(hyp_vcpus[i]->host_vcpu); +} + +static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm, + unsigned int nr_vcpus) +{ + hyp_vm->host_kvm = host_kvm; + hyp_vm->kvm.created_vcpus = nr_vcpus; + hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr; +} + +static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu, + struct pkvm_hyp_vm *hyp_vm, + struct kvm_vcpu *host_vcpu, + unsigned int vcpu_idx) +{ + int ret = 0; + + if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1)) + return -EBUSY; + + if (host_vcpu->vcpu_idx != vcpu_idx) { + ret = -EINVAL; + goto done; + } + + hyp_vcpu->host_vcpu = host_vcpu; + + hyp_vcpu->vcpu.kvm = &hyp_vm->kvm; + hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id); + hyp_vcpu->vcpu.vcpu_idx = vcpu_idx; + + hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu; + hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags); +done: + if (ret) + unpin_host_vcpu(host_vcpu); + return ret; +} + +static int find_free_vm_table_entry(struct kvm *host_kvm) +{ + int i; + + for (i = 0; i < KVM_MAX_PVMS; ++i) { + if (!vm_table[i]) + return i; + } + + return -ENOMEM; +} + +/* + * Allocate a VM table entry and insert a pointer to the new vm. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm, + struct pkvm_hyp_vm *hyp_vm) +{ + struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu; + int idx; + + hyp_assert_lock_held(&vm_table_lock); + + /* + * Initializing protected state might have failed, yet a malicious + * host could trigger this function. Thus, ensure that 'vm_table' + * exists. + */ + if (unlikely(!vm_table)) + return -EINVAL; + + idx = find_free_vm_table_entry(host_kvm); + if (idx < 0) + return idx; + + hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx); + + /* VMID 0 is reserved for the host */ + atomic64_set(&mmu->vmid.id, idx + 1); + + mmu->arch = &hyp_vm->kvm.arch; + mmu->pgt = &hyp_vm->pgt; + + vm_table[idx] = hyp_vm; + return hyp_vm->kvm.arch.pkvm.handle; +} + +/* + * Deallocate and remove the VM table entry corresponding to the handle. + */ +static void remove_vm_table_entry(pkvm_handle_t handle) +{ + hyp_assert_lock_held(&vm_table_lock); + vm_table[vm_handle_to_idx(handle)] = NULL; +} + +static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus) +{ + return size_add(sizeof(struct pkvm_hyp_vm), + size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus)); +} + +static void *map_donated_memory_noclear(unsigned long host_va, size_t size) +{ + void *va = (void *)kern_hyp_va(host_va); + + if (!PAGE_ALIGNED(va)) + return NULL; + + if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)) + return NULL; + + return va; +} + +static void *map_donated_memory(unsigned long host_va, size_t size) +{ + void *va = map_donated_memory_noclear(host_va, size); + + if (va) + memset(va, 0, size); + + return va; +} + +static void __unmap_donated_memory(void *va, size_t size) +{ + WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va), + PAGE_ALIGN(size) >> PAGE_SHIFT)); +} + +static void unmap_donated_memory(void *va, size_t size) +{ + if (!va) + return; + + memset(va, 0, size); + __unmap_donated_memory(va, size); +} + +static void unmap_donated_memory_noclear(void *va, size_t size) +{ + if (!va) + return; + + __unmap_donated_memory(va, size); +} + +/* + * Initialize the hypervisor copy of the protected VM state using the + * memory donated by the host. + * + * Unmaps the donated memory from the host at stage 2. + * + * host_kvm: A pointer to the host's struct kvm. + * vm_hva: The host va of the area being donated for the VM state. + * Must be page aligned. + * pgd_hva: The host va of the area being donated for the stage-2 PGD for + * the VM. Must be page aligned. Its size is implied by the VM's + * VTCR. + * + * Return a unique handle to the protected VM on success, + * negative error code on failure. + */ +int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva, + unsigned long pgd_hva) +{ + struct pkvm_hyp_vm *hyp_vm = NULL; + size_t vm_size, pgd_size; + unsigned int nr_vcpus; + void *pgd = NULL; + int ret; + + ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1); + if (ret) + return ret; + + nr_vcpus = READ_ONCE(host_kvm->created_vcpus); + if (nr_vcpus < 1) { + ret = -EINVAL; + goto err_unpin_kvm; + } + + vm_size = pkvm_get_hyp_vm_size(nr_vcpus); + pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr); + + ret = -ENOMEM; + + hyp_vm = map_donated_memory(vm_hva, vm_size); + if (!hyp_vm) + goto err_remove_mappings; + + pgd = map_donated_memory_noclear(pgd_hva, pgd_size); + if (!pgd) + goto err_remove_mappings; + + init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus); + + hyp_spin_lock(&vm_table_lock); + ret = insert_vm_table_entry(host_kvm, hyp_vm); + if (ret < 0) + goto err_unlock; + + ret = kvm_guest_prepare_stage2(hyp_vm, pgd); + if (ret) + goto err_remove_vm_table_entry; + hyp_spin_unlock(&vm_table_lock); + + return hyp_vm->kvm.arch.pkvm.handle; + +err_remove_vm_table_entry: + remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle); +err_unlock: + hyp_spin_unlock(&vm_table_lock); +err_remove_mappings: + unmap_donated_memory(hyp_vm, vm_size); + unmap_donated_memory(pgd, pgd_size); +err_unpin_kvm: + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return ret; +} + +/* + * Initialize the hypervisor copy of the protected vCPU state using the + * memory donated by the host. + * + * handle: The handle for the protected vm. + * host_vcpu: A pointer to the corresponding host vcpu. + * vcpu_hva: The host va of the area being donated for the vcpu state. + * Must be page aligned. The size of the area must be equal to + * the page-aligned size of 'struct pkvm_hyp_vcpu'. + * Return 0 on success, negative error code on failure. + */ +int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu, + unsigned long vcpu_hva) +{ + struct pkvm_hyp_vcpu *hyp_vcpu; + struct pkvm_hyp_vm *hyp_vm; + unsigned int idx; + int ret; + + hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu)); + if (!hyp_vcpu) + return -ENOMEM; + + hyp_spin_lock(&vm_table_lock); + + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + ret = -ENOENT; + goto unlock; + } + + idx = hyp_vm->nr_vcpus; + if (idx >= hyp_vm->kvm.created_vcpus) { + ret = -EINVAL; + goto unlock; + } + + ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx); + if (ret) + goto unlock; + + hyp_vm->vcpus[idx] = hyp_vcpu; + hyp_vm->nr_vcpus++; +unlock: + hyp_spin_unlock(&vm_table_lock); + + if (ret) + unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu)); + + return ret; +} + +static void +teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size) +{ + size = PAGE_ALIGN(size); + memset(addr, 0, size); + + for (void *start = addr; start < addr + size; start += PAGE_SIZE) + push_hyp_memcache(mc, start, hyp_virt_to_phys); + + unmap_donated_memory_noclear(addr, size); +} + +int __pkvm_teardown_vm(pkvm_handle_t handle) +{ + struct kvm_hyp_memcache *mc; + struct pkvm_hyp_vm *hyp_vm; + struct kvm *host_kvm; + unsigned int idx; + size_t vm_size; + int err; + + hyp_spin_lock(&vm_table_lock); + hyp_vm = get_vm_by_handle(handle); + if (!hyp_vm) { + err = -ENOENT; + goto err_unlock; + } + + if (WARN_ON(hyp_page_count(hyp_vm))) { + err = -EBUSY; + goto err_unlock; + } + + host_kvm = hyp_vm->host_kvm; + + /* Ensure the VMID is clean before it can be reallocated */ + __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu); + remove_vm_table_entry(handle); + hyp_spin_unlock(&vm_table_lock); + + /* Reclaim guest pages (including page-table pages) */ + mc = &host_kvm->arch.pkvm.teardown_mc; + reclaim_guest_pages(hyp_vm, mc); + unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus); + + /* Push the metadata pages to the teardown memcache */ + for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) { + struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx]; + + teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu)); + } + + vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus); + teardown_donated_memory(mc, hyp_vm, vm_size); + hyp_unpin_shared_mem(host_kvm, host_kvm + 1); + return 0; + +err_unlock: + hyp_spin_unlock(&vm_table_lock); + return err; +} diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e8d4ea2fcfa09039e98b966aefeecabb7d586e2a..110f04627785c8dd0dccebbd2cd674478b255e7b 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -16,6 +16,7 @@ #include #include #include +#include #include unsigned long hyp_nr_cpus; @@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus; (unsigned long)__per_cpu_start) static void *vmemmap_base; +static void *vm_table_base; static void *hyp_pgt_base; static void *host_s2_pgt_base; static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops; @@ -31,16 +33,20 @@ static struct hyp_pool hpool; static int divide_memory_pool(void *virt, unsigned long size) { - unsigned long vstart, vend, nr_pages; + unsigned long nr_pages; hyp_early_alloc_init(virt, size); - hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend); - nr_pages = (vend - vstart) >> PAGE_SHIFT; + nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page)); vmemmap_base = hyp_early_alloc_contig(nr_pages); if (!vmemmap_base) return -ENOMEM; + nr_pages = hyp_vm_table_pages(); + vm_table_base = hyp_early_alloc_contig(nr_pages); + if (!vm_table_base) + return -ENOMEM; + nr_pages = hyp_s1_pgtable_pages(); hyp_pgt_base = hyp_early_alloc_contig(nr_pages); if (!hyp_pgt_base) @@ -78,7 +84,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, if (ret) return ret; - ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base)); + ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base)); if (ret) return ret; @@ -138,20 +144,17 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size, } /* - * Map the host's .bss and .rodata sections RO in the hypervisor, but - * transfer the ownership from the host to the hypervisor itself to - * make sure it can't be donated or shared with another entity. + * Map the host sections RO in the hypervisor, but transfer the + * ownership from the host to the hypervisor itself to make sure they + * can't be donated or shared with another entity. * * The ownership transition requires matching changes in the host * stage-2. This will be done later (see finalize_host_mappings()) once * the hyp_vmemmap is addressable. */ prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED); - ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot); - if (ret) - return ret; - - ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot); + ret = pkvm_create_mappings(&kvm_vgic_global_state, + &kvm_vgic_global_state + 1, prot); if (ret) return ret; @@ -186,33 +189,20 @@ static void hpool_put_page(void *addr) hyp_put_page(&hpool, addr); } -static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; - kvm_pte_t pte = *ptep; phys_addr_t phys; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; - /* - * Fix-up the refcount for the page-table pages as the early allocator - * was unable to access the hyp_vmemmap and so the buddy allocator has - * initialised the refcount to '1'. - */ - mm_ops->get_page(ptep); - if (flag != KVM_PGTABLE_WALK_LEAF) - return 0; - - if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) + if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; - phys = kvm_pte_to_phys(pte); + phys = kvm_pte_to_phys(ctx->old); if (!addr_is_memory(phys)) return -EINVAL; @@ -220,10 +210,10 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, * Adjust the host stage-2 mappings to match the ownership attributes * configured in the hypervisor stage-1. */ - state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte)); + state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old)); switch (state) { case PKVM_PAGE_OWNED: - return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id); + return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP); case PKVM_PAGE_SHARED_OWNED: prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED); break; @@ -237,12 +227,25 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, return host_stage2_idmap_locked(phys, PAGE_SIZE, prot); } -static int finalize_host_mappings(void) +static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) +{ + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + if (kvm_pte_valid(ctx->old)) + ctx->mm_ops->get_page(ctx->ptep); + + return 0; +} + +static int fix_host_ownership(void) { struct kvm_pgtable_walker walker = { - .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pkvm_pgtable.mm_ops, + .cb = fix_host_ownership_walker, + .flags = KVM_PGTABLE_WALK_LEAF, }; int i, ret; @@ -258,6 +261,18 @@ static int finalize_host_mappings(void) return 0; } +static int fix_hyp_pgtable_refcnt(void) +{ + struct kvm_pgtable_walker walker = { + .cb = fix_hyp_pgtable_refcnt_walker, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, + }; + + return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits), + &walker); +} + void __noreturn __pkvm_init_finalise(void) { struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data); @@ -287,10 +302,19 @@ void __noreturn __pkvm_init_finalise(void) }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; - ret = finalize_host_mappings(); + ret = fix_host_ownership(); + if (ret) + goto out; + + ret = fix_hyp_pgtable_refcnt(); + if (ret) + goto out; + + ret = hyp_create_pcpu_fixmap(); if (ret) goto out; + pkvm_hyp_vm_table_init(vm_table_base); out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be141380c95a39e76398a222be75c93..b11cf2c618a6c9a7a52762d5eae98667fa821086 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -49,35 +49,38 @@ #define KVM_INVALID_PTE_OWNER_MASK GENMASK(9, 2) #define KVM_MAX_OWNER_ID 1 +/* + * Used to indicate a pte for which a 'break-before-make' sequence is in + * progress. + */ +#define KVM_INVALID_PTE_LOCKED BIT(10) + struct kvm_pgtable_walk_data { - struct kvm_pgtable *pgt; struct kvm_pgtable_walker *walker; u64 addr; u64 end; }; -#define KVM_PHYS_INVALID (-1ULL) - static bool kvm_phys_is_valid(u64 phys) { return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX)); } -static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) +static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys) { - u64 granule = kvm_granule_size(level); + u64 granule = kvm_granule_size(ctx->level); - if (!kvm_level_supports_block_mapping(level)) + if (!kvm_level_supports_block_mapping(ctx->level)) return false; - if (granule > (end - addr)) + if (granule > (ctx->end - ctx->addr)) return false; if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule)) return false; - return IS_ALIGNED(addr, granule); + return IS_ALIGNED(ctx->addr, granule); } static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) @@ -88,7 +91,7 @@ static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) return (data->addr >> shift) & mask; } -static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) +static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) { u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */ u64 mask = BIT(pgt->ia_bits) - 1; @@ -96,11 +99,6 @@ static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr) return (addr & mask) >> shift; } -static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data) -{ - return __kvm_pgd_page_idx(data->pgt, data->addr); -} - static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) { struct kvm_pgtable pgt = { @@ -108,7 +106,7 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level) .start_level = start_level, }; - return __kvm_pgd_page_idx(&pgt, -1ULL) + 1; + return kvm_pgd_page_idx(&pgt, -1ULL) + 1; } static bool kvm_pte_table(kvm_pte_t pte, u32 level) @@ -122,16 +120,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level) return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE; } -static kvm_pte_t kvm_phys_to_pte(u64 pa) -{ - kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - - if (PAGE_SHIFT == 16) - pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); - - return pte; -} - static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops) { return mm_ops->phys_to_virt(kvm_pte_to_phys(pte)); @@ -142,16 +130,13 @@ static void kvm_clear_pte(kvm_pte_t *ptep) WRITE_ONCE(*ptep, 0); } -static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp, - struct kvm_pgtable_mm_ops *mm_ops) +static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops) { - kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); + kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp)); pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE); pte |= KVM_PTE_VALID; - - WARN_ON(kvm_pte_valid(old)); - smp_store_release(ptep, pte); + return pte; } static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level) @@ -172,36 +157,47 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id) return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id); } -static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr, - u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag) +static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, + const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { struct kvm_pgtable_walker *walker = data->walker; - return walker->cb(addr, data->end, level, ptep, flag, walker->arg); + + /* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */ + WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held()); + return walker->cb(ctx, visit); } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level); + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level); static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, - kvm_pte_t *ptep, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, + kvm_pteref_t pteref, u32 level) { - int ret = 0; - u64 addr = data->addr; - kvm_pte_t *childp, pte = *ptep; - bool table = kvm_pte_table(pte, level); enum kvm_pgtable_walk_flags flags = data->walker->flags; + kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref); + struct kvm_pgtable_visit_ctx ctx = { + .ptep = ptep, + .old = READ_ONCE(*ptep), + .arg = data->walker->arg, + .mm_ops = mm_ops, + .addr = data->addr, + .end = data->end, + .level = level, + .flags = flags, + }; + int ret = 0; + kvm_pteref_t childp; + bool table = kvm_pte_table(ctx.old, level); - if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_PRE); - } + if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE)) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE); - if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_LEAF); - pte = *ptep; - table = kvm_pte_table(pte, level); + if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) { + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF); + ctx.old = READ_ONCE(*ptep); + table = kvm_pte_table(ctx.old, level); } if (ret) @@ -213,22 +209,20 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, goto out; } - childp = kvm_pte_follow(pte, data->pgt->mm_ops); - ret = __kvm_pgtable_walk(data, childp, level + 1); + childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops); + ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1); if (ret) goto out; - if (flags & KVM_PGTABLE_WALK_TABLE_POST) { - ret = kvm_pgtable_visitor_cb(data, addr, level, ptep, - KVM_PGTABLE_WALK_TABLE_POST); - } + if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST) + ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST); out: return ret; } static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, - kvm_pte_t *pgtable, u32 level) + struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level) { u32 idx; int ret = 0; @@ -237,12 +231,12 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return -EINVAL; for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { - kvm_pte_t *ptep = &pgtable[idx]; + kvm_pteref_t pteref = &pgtable[idx]; if (data->addr >= data->end) break; - ret = __kvm_pgtable_visit(data, ptep, level); + ret = __kvm_pgtable_visit(data, mm_ops, pteref, level); if (ret) break; } @@ -250,11 +244,10 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, return ret; } -static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) +static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data) { u32 idx; int ret = 0; - struct kvm_pgtable *pgt = data->pgt; u64 limit = BIT(pgt->ia_bits); if (data->addr > limit || data->end > limit) @@ -263,10 +256,10 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) if (!pgt->pgd) return -EINVAL; - for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { - kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; + for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) { + kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE]; - ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); + ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level); if (ret) break; } @@ -278,13 +271,20 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size, struct kvm_pgtable_walker *walker) { struct kvm_pgtable_walk_data walk_data = { - .pgt = pgt, .addr = ALIGN_DOWN(addr, PAGE_SIZE), .end = PAGE_ALIGN(walk_data.addr + size), .walker = walker, }; + int r; - return _kvm_pgtable_walk(&walk_data); + r = kvm_pgtable_walk_begin(walker); + if (r) + return r; + + r = _kvm_pgtable_walk(pgt, &walk_data); + kvm_pgtable_walk_end(walker); + + return r; } struct leaf_walk_data { @@ -292,13 +292,13 @@ struct leaf_walk_data { u32 level; }; -static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct leaf_walk_data *data = arg; + struct leaf_walk_data *data = ctx->arg; - data->pte = *ptep; - data->level = level; + data->pte = ctx->old; + data->level = ctx->level; return 0; } @@ -329,7 +329,6 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr, struct hyp_map_data { u64 phys; kvm_pte_t attr; - struct kvm_pgtable_mm_ops *mm_ops; }; static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep) @@ -383,47 +382,49 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, struct hyp_map_data *data) +static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, + struct hyp_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; - if (!kvm_block_mapping_supported(addr, end, phys, level)) + if (!kvm_block_mapping_supported(ctx, phys)) return false; data->phys += granule; - new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (old == new) + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); + if (ctx->old == new) return true; - if (!kvm_pte_valid(old)) - data->mm_ops->get_page(ptep); - else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + if (!kvm_pte_valid(ctx->old)) + ctx->mm_ops->get_page(ctx->ptep); + else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) return false; - smp_store_release(ptep, new); + smp_store_release(ctx->ptep, new); return true; } -static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t *childp; - struct hyp_map_data *data = arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + kvm_pte_t *childp, new; + struct hyp_map_data *data = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg)) + if (hyp_map_walker_try_leaf(ctx, data)) return 0; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL); if (!childp) return -ENOMEM; - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + new = kvm_init_table_pte(childp, mm_ops); + mm_ops->get_page(ctx->ptep); + smp_store_release(ctx->ptep, new); + return 0; } @@ -433,7 +434,6 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, int ret; struct hyp_map_data map_data = { .phys = ALIGN_DOWN(phys, PAGE_SIZE), - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = hyp_map_walker, @@ -451,44 +451,39 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } -struct hyp_unmap_data { - u64 unmapped; - struct kvm_pgtable_mm_ops *mm_ops; -}; - -static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep, *childp = NULL; - u64 granule = kvm_granule_size(level); - struct hyp_unmap_data *data = arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + kvm_pte_t *childp = NULL; + u64 granule = kvm_granule_size(ctx->level); + u64 *unmapped = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return -EINVAL; - if (kvm_pte_table(pte, level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); } else { - if (end - addr < granule) + if (ctx->end - ctx->addr < granule) return -EINVAL; - kvm_clear_pte(ptep); + kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); - data->unmapped += granule; + __tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + *unmapped += granule; } dsb(ish); isb(); - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); if (childp) mm_ops->put_page(childp); @@ -498,12 +493,10 @@ static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) { - struct hyp_unmap_data unmap_data = { - .mm_ops = pgt->mm_ops, - }; + u64 unmapped = 0; struct kvm_pgtable_walker walker = { .cb = hyp_unmap_walker, - .arg = &unmap_data, + .arg = &unmapped, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, }; @@ -511,7 +504,7 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) return 0; kvm_pgtable_walk(pgt, addr, size, &walker); - return unmap_data.unmapped; + return unmapped; } int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, @@ -519,7 +512,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, { u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits); - pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL); if (!pgt->pgd) return -ENOMEM; @@ -532,19 +525,18 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, return 0; } -static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } @@ -554,11 +546,10 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); - pgt->mm_ops->put_page(pgt->pgd); + pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd)); pgt->pgd = NULL; } @@ -573,8 +564,6 @@ struct stage2_map_data { struct kvm_s2_mmu *mmu; void *memcache; - struct kvm_pgtable_mm_ops *mm_ops; - /* Force mappings to page granularity */ bool force_pte; }; @@ -682,19 +671,92 @@ static bool stage2_pte_is_counted(kvm_pte_t pte) return !!pte; } -static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr, - u32 level, struct kvm_pgtable_mm_ops *mm_ops) +static bool stage2_pte_is_locked(kvm_pte_t pte) +{ + return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED); +} + +static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + if (!kvm_pgtable_walk_shared(ctx)) { + WRITE_ONCE(*ctx->ptep, new); + return true; + } + + return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old; +} + +/** + * stage2_try_break_pte() - Invalidates a pte according to the + * 'break-before-make' requirements of the + * architecture. + * + * @ctx: context of the visited pte. + * @mmu: stage-2 mmu + * + * Returns: true if the pte was successfully broken. + * + * If the removed pte was valid, performs the necessary serialization and TLB + * invalidation for the old value. For counted ptes, drops the reference count + * on the containing table page. + */ +static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, + struct kvm_s2_mmu *mmu) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + if (stage2_pte_is_locked(ctx->old)) { + /* + * Should never occur if this walker has exclusive access to the + * page tables. + */ + WARN_ON(!kvm_pgtable_walk_shared(ctx)); + return false; + } + + if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED)) + return false; + + /* + * Perform the appropriate TLB invalidation based on the evicted pte + * value (if any). + */ + if (kvm_pte_table(ctx->old, ctx->level)) + kvm_call_hyp(__kvm_tlb_flush_vmid, mmu); + else if (kvm_pte_valid(ctx->old)) + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + + if (stage2_pte_is_counted(ctx->old)) + mm_ops->put_page(ctx->ptep); + + return true; +} + +static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new) +{ + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + + WARN_ON(!stage2_pte_is_locked(*ctx->ptep)); + + if (stage2_pte_is_counted(new)) + mm_ops->get_page(ctx->ptep); + + smp_store_release(ctx->ptep, new); +} + +static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu, + struct kvm_pgtable_mm_ops *mm_ops) { /* * Clear the existing PTE, and perform break-before-make with * TLB maintenance if it was valid. */ - if (kvm_pte_valid(*ptep)) { - kvm_clear_pte(ptep); - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level); + if (kvm_pte_valid(ctx->old)) { + kvm_clear_pte(ctx->ptep); + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); } - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); } static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte) @@ -708,44 +770,42 @@ static bool stage2_pte_executable(kvm_pte_t pte) return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN); } -static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level, +static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1))) + if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1))) return false; - return kvm_block_mapping_supported(addr, end, data->phys, level); + return kvm_block_mapping_supported(ctx, data->phys); } -static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - kvm_pte_t new, old = *ptep; - u64 granule = kvm_granule_size(level), phys = data->phys; + kvm_pte_t new; + u64 granule = kvm_granule_size(ctx->level), phys = data->phys; struct kvm_pgtable *pgt = data->mmu->pgt; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return -E2BIG; if (kvm_phys_is_valid(phys)) - new = kvm_init_valid_leaf_pte(phys, data->attr, level); + new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level); else new = kvm_init_invalid_leaf_owner(data->owner_id); - if (stage2_pte_is_counted(old)) { - /* - * Skip updating the PTE if we are trying to recreate the exact - * same mapping or only change the access permissions. Instead, - * the vCPU will exit one more time from guest if still needed - * and then go through the path of relaxing permissions. - */ - if (!stage2_pte_needs_update(old, new)) - return -EAGAIN; + /* + * Skip updating the PTE if we are trying to recreate the exact + * same mapping or only change the access permissions. Instead, + * the vCPU will exit one more time from guest if still needed + * and then go through the path of relaxing permissions. + */ + if (!stage2_pte_needs_update(ctx->old, new)) + return -EAGAIN; - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); - } + if (!stage2_try_break_pte(ctx, data->mmu)) + return -EAGAIN; /* Perform CMOs before installation of the guest stage-2 PTE */ if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new)) @@ -755,56 +815,43 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (mm_ops->icache_inval_pou && stage2_pte_executable(new)) mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule); - smp_store_release(ptep, new); - if (stage2_pte_is_counted(new)) - mm_ops->get_page(ptep); + stage2_make_pte(ctx, new); + if (kvm_phys_is_valid(phys)) data->phys += granule; return 0; } -static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, +static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - if (data->anchor) - return 0; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops); + int ret; - if (!stage2_leaf_mapping_allowed(addr, end, level, data)) + if (!stage2_leaf_mapping_allowed(ctx, data)) return 0; - data->childp = kvm_pte_follow(*ptep, data->mm_ops); - kvm_clear_pte(ptep); + ret = stage2_map_walker_try_leaf(ctx, data); + if (ret) + return ret; - /* - * Invalidate the whole stage-2, as we may have numerous leaf - * entries below us which would otherwise need invalidating - * individually. - */ - kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu); - data->anchor = ptep; + mm_ops->free_removed_table(childp, ctx->level); return 0; } -static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, +static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx, struct stage2_map_data *data) { - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp, pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp, new; int ret; - if (data->anchor) { - if (stage2_pte_is_counted(pte)) - mm_ops->put_page(ptep); - - return 0; - } - - ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data); + ret = stage2_map_walker_try_leaf(ctx, data); if (ret != -E2BIG) return ret; - if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1)) + if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; if (!data->memcache) @@ -814,99 +861,62 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, if (!childp) return -ENOMEM; + if (!stage2_try_break_pte(ctx, data->mmu)) { + mm_ops->put_page(childp); + return -EAGAIN; + } + /* * If we've run into an existing block mapping then replace it with * a table. Accesses beyond 'end' that fall within the new table * will be mapped lazily. */ - if (stage2_pte_is_counted(pte)) - stage2_put_pte(ptep, data->mmu, addr, level, mm_ops); - - kvm_set_table_pte(ptep, childp, mm_ops); - mm_ops->get_page(ptep); + new = kvm_init_table_pte(childp, mm_ops); + stage2_make_pte(ctx, new); return 0; } -static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level, - kvm_pte_t *ptep, - struct stage2_map_data *data) -{ - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; - kvm_pte_t *childp; - int ret = 0; - - if (!data->anchor) - return 0; - - if (data->anchor == ptep) { - childp = data->childp; - data->anchor = NULL; - data->childp = NULL; - ret = stage2_map_walk_leaf(addr, end, level, ptep, data); - } else { - childp = kvm_pte_follow(*ptep, mm_ops); - } - - mm_ops->put_page(childp); - mm_ops->put_page(ptep); - - return ret; -} - /* - * This is a little fiddly, as we use all three of the walk flags. The idea - * is that the TABLE_PRE callback runs for table entries on the way down, - * looking for table entries which we could conceivably replace with a - * block entry for this mapping. If it finds one, then it sets the 'anchor' - * field in 'struct stage2_map_data' to point at the table entry, before - * clearing the entry to zero and descending into the now detached table. + * The TABLE_PRE callback runs for table entries on the way down, looking + * for table entries which we could conceivably replace with a block entry + * for this mapping. If it finds one it replaces the entry and calls + * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table. * - * The behaviour of the LEAF callback then depends on whether or not the - * anchor has been set. If not, then we're not using a block mapping higher - * up the table and we perform the mapping at the existing leaves instead. - * If, on the other hand, the anchor _is_ set, then we drop references to - * all valid leaves so that the pages beneath the anchor can be freed. - * - * Finally, the TABLE_POST callback does nothing if the anchor has not - * been set, but otherwise frees the page-table pages while walking back up - * the page-table, installing the block entry when it revisits the anchor - * pointer and clearing the anchor to NULL. + * Otherwise, the LEAF callback performs the mapping at the existing leaves + * instead. */ -static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, void * const arg) +static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct stage2_map_data *data = arg; + struct stage2_map_data *data = ctx->arg; - switch (flag) { + switch (visit) { case KVM_PGTABLE_WALK_TABLE_PRE: - return stage2_map_walk_table_pre(addr, end, level, ptep, data); + return stage2_map_walk_table_pre(ctx, data); case KVM_PGTABLE_WALK_LEAF: - return stage2_map_walk_leaf(addr, end, level, ptep, data); - case KVM_PGTABLE_WALK_TABLE_POST: - return stage2_map_walk_table_post(addr, end, level, ptep, data); + return stage2_map_walk_leaf(ctx, data); + default: + return -EINVAL; } - - return -EINVAL; } int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot, - void *mc) + void *mc, enum kvm_pgtable_walk_flags flags) { int ret; struct stage2_map_data map_data = { .phys = ALIGN_DOWN(phys, PAGE_SIZE), .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .force_pte = pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot), }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, - .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + .flags = flags | + KVM_PGTABLE_WALK_TABLE_PRE | + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -930,15 +940,13 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, .phys = KVM_PHYS_INVALID, .mmu = pgt->mmu, .memcache = mc, - .mm_ops = pgt->mm_ops, .owner_id = owner_id, .force_pte = true, }; struct kvm_pgtable_walker walker = { .cb = stage2_map_walker, .flags = KVM_PGTABLE_WALK_TABLE_PRE | - KVM_PGTABLE_WALK_LEAF | - KVM_PGTABLE_WALK_TABLE_POST, + KVM_PGTABLE_WALK_LEAF, .arg = &map_data, }; @@ -949,30 +957,29 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size, return ret; } -static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_s2_mmu *mmu = pgt->mmu; - struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep, *childp = NULL; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; + kvm_pte_t *childp = NULL; bool need_flush = false; - if (!kvm_pte_valid(pte)) { - if (stage2_pte_is_counted(pte)) { - kvm_clear_pte(ptep); - mm_ops->put_page(ptep); + if (!kvm_pte_valid(ctx->old)) { + if (stage2_pte_is_counted(ctx->old)) { + kvm_clear_pte(ctx->ptep); + mm_ops->put_page(ctx->ptep); } return 0; } - if (kvm_pte_table(pte, level)) { - childp = kvm_pte_follow(pte, mm_ops); + if (kvm_pte_table(ctx->old, ctx->level)) { + childp = kvm_pte_follow(ctx->old, mm_ops); if (mm_ops->page_count(childp) != 1) return 0; - } else if (stage2_pte_cacheable(pgt, pte)) { + } else if (stage2_pte_cacheable(pgt, ctx->old)) { need_flush = !stage2_has_fwb(pgt); } @@ -981,11 +988,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * block entry and rely on the remaining portions being faulted * back lazily. */ - stage2_put_pte(ptep, mmu, addr, level, mm_ops); + stage2_put_pte(ctx, mmu, mm_ops); if (need_flush && mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), + kvm_granule_size(ctx->level)); if (childp) mm_ops->put_page(childp); @@ -1009,21 +1016,19 @@ struct stage2_attr_data { kvm_pte_t attr_clr; kvm_pte_t pte; u32 level; - struct kvm_pgtable_mm_ops *mm_ops; }; -static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - kvm_pte_t pte = *ptep; - struct stage2_attr_data *data = arg; - struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + kvm_pte_t pte = ctx->old; + struct stage2_attr_data *data = ctx->arg; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!kvm_pte_valid(pte)) + if (!kvm_pte_valid(ctx->old)) return 0; - data->level = level; + data->level = ctx->level; data->pte = pte; pte &= ~data->attr_clr; pte |= data->attr_set; @@ -1039,10 +1044,12 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, * stage-2 PTE if we are going to add executable permission. */ if (mm_ops->icache_inval_pou && - stage2_pte_executable(pte) && !stage2_pte_executable(*ptep)) + stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old)) mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); - WRITE_ONCE(*ptep, pte); + kvm_granule_size(ctx->level)); + + if (!stage2_try_set_pte(ctx, pte)) + return -EAGAIN; } return 0; @@ -1051,19 +1058,18 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr, u64 size, kvm_pte_t attr_set, kvm_pte_t attr_clr, kvm_pte_t *orig_pte, - u32 *level) + u32 *level, enum kvm_pgtable_walk_flags flags) { int ret; kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI; struct stage2_attr_data data = { .attr_set = attr_set & attr_mask, .attr_clr = attr_clr & attr_mask, - .mm_ops = pgt->mm_ops, }; struct kvm_pgtable_walker walker = { .cb = stage2_attr_walker, .arg = &data, - .flags = KVM_PGTABLE_WALK_LEAF, + .flags = flags | KVM_PGTABLE_WALK_LEAF, }; ret = kvm_pgtable_walk(pgt, addr, size, &walker); @@ -1082,14 +1088,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size) { return stage2_update_leaf_attrs(pgt, addr, size, 0, KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W, - NULL, NULL); + NULL, NULL, 0); } kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0, - &pte, NULL); + &pte, NULL, 0); dsb(ishst); return pte; } @@ -1098,7 +1104,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF, - &pte, NULL); + &pte, NULL, 0); /* * "But where's the TLBI?!", you scream. * "Over in the core code", I sigh. @@ -1111,7 +1117,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr) bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr) { kvm_pte_t pte = 0; - stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL); + stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0); return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF; } @@ -1134,26 +1140,25 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, if (prot & KVM_PGTABLE_PROT_X) clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN; - ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level); + ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level, + KVM_PGTABLE_WALK_SHARED); if (!ret) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level); return ret; } -static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable *pgt = arg; + struct kvm_pgtable *pgt = ctx->arg; struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops; - kvm_pte_t pte = *ptep; - if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte)) + if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old)) return 0; if (mm_ops->dcache_clean_inval_poc) - mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops), - kvm_granule_size(level)); + mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops), + kvm_granule_size(ctx->level)); return 0; } @@ -1184,7 +1189,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; - pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz); + pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz); if (!pgt->pgd) return -ENOMEM; @@ -1200,20 +1205,27 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu, return 0; } -static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, - enum kvm_pgtable_walk_flags flag, - void * const arg) +size_t kvm_pgtable_stage2_pgd_size(u64 vtcr) +{ + u32 ia_bits = VTCR_EL2_IPA(vtcr); + u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr); + u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0; + + return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE; +} + +static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx, + enum kvm_pgtable_walk_flags visit) { - struct kvm_pgtable_mm_ops *mm_ops = arg; - kvm_pte_t pte = *ptep; + struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops; - if (!stage2_pte_is_counted(pte)) + if (!stage2_pte_is_counted(ctx->old)) return 0; - mm_ops->put_page(ptep); + mm_ops->put_page(ctx->ptep); - if (kvm_pte_table(pte, level)) - mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); + if (kvm_pte_table(ctx->old, ctx->level)) + mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops)); return 0; } @@ -1225,11 +1237,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt) .cb = stage2_free_walker, .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, - .arg = pgt->mm_ops, }; WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker)); pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE; - pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz); + pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz); pgt->pgd = NULL; } + +void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level) +{ + kvm_pteref_t ptep = (kvm_pteref_t)pgtable; + struct kvm_pgtable_walker walker = { + .cb = stage2_free_walker, + .flags = KVM_PGTABLE_WALK_LEAF | + KVM_PGTABLE_WALK_TABLE_POST, + }; + struct kvm_pgtable_walk_data data = { + .walker = &walker, + + /* + * At this point the IPA really doesn't matter, as the page + * table being traversed has already been removed from the stage + * 2. Set an appropriate range to cover the entire page table. + */ + .addr = 0, + .end = kvm_granule_size(level), + }; + + WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1)); +} diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile index 96bec0ecf9dd96c0a3f19e6bfedd4b8852b4eedd..3b9e5464b5b39ca07633751f2eea412c03d672f9 100644 --- a/arch/arm64/kvm/hyp/vhe/Makefile +++ b/arch/arm64/kvm/hyp/vhe/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 # -# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part +# Makefile for Kernel-based Virtual Machine module, HYP/VHE part # asflags-y := -D__KVM_VHE_HYPERVISOR__ diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h deleted file mode 100644 index 0d257de42c10997fbc147b5aa53a84b880f4b182..0000000000000000000000000000000000000000 --- a/arch/arm64/kvm/irq.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * irq.h: in kernel interrupt controller related definitions - * Copyright (c) 2016 Red Hat, Inc. - * - * This header is included by irqchip.c. However, on ARM, interrupt - * controller declarations are located in include/kvm/arm_vgic.h since - * they are mostly shared between arm and arm64. - */ - -#ifndef __IRQ_H -#define __IRQ_H - -#include - -#endif diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 60ee3d9f01f8c198b0dd90a35f32594e1cfff2d1..31d7fa4c7c140513f6b69034cafd5f23a8a88567 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -128,6 +128,25 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size) free_pages_exact(virt, size); } +static struct kvm_pgtable_mm_ops kvm_s2_mm_ops; + +static void stage2_free_removed_table_rcu_cb(struct rcu_head *head) +{ + struct page *page = container_of(head, struct page, rcu_head); + void *pgtable = page_to_virt(page); + u32 level = page_private(page); + + kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level); +} + +static void stage2_free_removed_table(void *addr, u32 level) +{ + struct page *page = virt_to_page(addr); + + set_page_private(page, (unsigned long)level); + call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb); +} + static void kvm_host_get_page(void *addr) { get_page(virt_to_page(addr)); @@ -640,8 +659,8 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = { static int get_user_mapping_size(struct kvm *kvm, u64 addr) { struct kvm_pgtable pgt = { - .pgd = (kvm_pte_t *)kvm->mm->pgd, - .ia_bits = VA_BITS, + .pgd = (kvm_pteref_t)kvm->mm->pgd, + .ia_bits = vabits_actual, .start_level = (KVM_PGTABLE_MAX_LEVELS - CONFIG_PGTABLE_LEVELS), .mm_ops = &kvm_user_mm_ops, @@ -662,6 +681,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { .zalloc_page = stage2_memcache_zalloc_page, .zalloc_pages_exact = kvm_s2_zalloc_pages_exact, .free_pages_exact = kvm_s2_free_pages_exact, + .free_removed_table = stage2_free_removed_table, .get_page = kvm_host_get_page, .put_page = kvm_s2_put_page, .page_count = kvm_host_page_count, @@ -675,15 +695,42 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = { * kvm_init_stage2_mmu - Initialise a S2 MMU structure * @kvm: The pointer to the KVM structure * @mmu: The pointer to the s2 MMU structure + * @type: The machine type of the virtual machine * * Allocates only the stage-2 HW PGD level table(s). * Note we don't need locking here as this is only called when the VM is * created, which can only be done once. */ -int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) +int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type) { + u32 kvm_ipa_limit = get_kvm_ipa_limit(); int cpu, err; struct kvm_pgtable *pgt; + u64 mmfr0, mmfr1; + u32 phys_shift; + + if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) + return -EINVAL; + + phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); + if (is_protected_kvm_enabled()) { + phys_shift = kvm_ipa_limit; + } else if (phys_shift) { + if (phys_shift > kvm_ipa_limit || + phys_shift < ARM64_MIN_PARANGE_BITS) + return -EINVAL; + } else { + phys_shift = KVM_PHYS_SHIFT; + if (phys_shift > kvm_ipa_limit) { + pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", + current->comm); + return -EINVAL; + } + } + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); if (mmu->pgt != NULL) { kvm_err("kvm_arch already initialized?\n"); @@ -807,6 +854,32 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu) } } +static void hyp_mc_free_fn(void *addr, void *unused) +{ + free_page((unsigned long)addr); +} + +static void *hyp_mc_alloc_fn(void *unused) +{ + return (void *)__get_free_page(GFP_KERNEL_ACCOUNT); +} + +void free_hyp_memcache(struct kvm_hyp_memcache *mc) +{ + if (is_protected_kvm_enabled()) + __free_hyp_memcache(mc, hyp_mc_free_fn, + kvm_host_va, NULL); +} + +int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages) +{ + if (!is_protected_kvm_enabled()) + return 0; + + return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn, + kvm_host_pa, NULL); +} + /** * kvm_phys_addr_ioremap - map a device range to guest IPA * @@ -841,7 +914,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, write_lock(&kvm->mmu_lock); ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot, - &cache); + &cache, 0); write_unlock(&kvm->mmu_lock); if (ret) break; @@ -1091,32 +1164,26 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva) * - mmap_lock protects between a VM faulting a page in and the VMM performing * an mprotect() to add VM_MTE */ -static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, - unsigned long size) +static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, + unsigned long size) { unsigned long i, nr_pages = size >> PAGE_SHIFT; - struct page *page; + struct page *page = pfn_to_page(pfn); if (!kvm_has_mte(kvm)) - return 0; - - /* - * pfn_to_online_page() is used to reject ZONE_DEVICE pages - * that may not support tags. - */ - page = pfn_to_online_page(pfn); - - if (!page) - return -EFAULT; + return; for (i = 0; i < nr_pages; i++, page++) { - if (!test_bit(PG_mte_tagged, &page->flags)) { + if (try_page_mte_tagging(page)) { mte_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } } +} - return 0; +static bool kvm_vma_mte_allowed(struct vm_area_struct *vma) +{ + return vma->vm_flags & VM_MTE_ALLOWED; } static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, @@ -1127,7 +1194,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, bool write_fault, writable, force_pte = false; bool exec_fault; bool device = false; - bool shared; unsigned long mmu_seq; struct kvm *kvm = vcpu->kvm; struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; @@ -1136,7 +1202,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn_t gfn; kvm_pfn_t pfn; bool logging_active = memslot_is_logging(memslot); - bool use_read_lock = false; unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu); unsigned long vma_pagesize, fault_granule; enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R; @@ -1171,14 +1236,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (logging_active) { force_pte = true; vma_shift = PAGE_SHIFT; - use_read_lock = (fault_status == FSC_PERM && write_fault && - fault_granule == PAGE_SIZE); } else { vma_shift = get_vma_page_shift(vma, hva); } - shared = (vma->vm_flags & VM_SHARED); - switch (vma_shift) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SHIFT: @@ -1239,7 +1300,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, */ smp_rmb(); - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, write_fault, &writable, NULL); if (pfn == KVM_PFN_ERR_HWPOISON) { kvm_send_hwpoison_signal(hva, vma_shift); @@ -1271,15 +1332,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, if (exec_fault && device) return -ENOEXEC; - /* - * To reduce MMU contentions and enhance concurrency during dirty - * logging dirty logging, only acquire read lock for permission - * relaxation. - */ - if (use_read_lock) - read_lock(&kvm->mmu_lock); - else - write_lock(&kvm->mmu_lock); + read_lock(&kvm->mmu_lock); pgt = vcpu->arch.hw_mmu->pgt; if (mmu_invalidate_retry(kvm, mmu_seq)) goto out_unlock; @@ -1298,13 +1351,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) { - /* Check the VMM hasn't introduced a new VM_SHARED VMA */ - if (!shared) - ret = sanitise_mte_tags(kvm, pfn, vma_pagesize); - else + /* Check the VMM hasn't introduced a new disallowed VMA */ + if (kvm_vma_mte_allowed(vma)) { + sanitise_mte_tags(kvm, pfn, vma_pagesize); + } else { ret = -EFAULT; - if (ret) goto out_unlock; + } } if (writable) @@ -1323,15 +1376,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, * permissions only if vma_pagesize equals fault_granule. Otherwise, * kvm_pgtable_stage2_map() should be called to change block size. */ - if (fault_status == FSC_PERM && vma_pagesize == fault_granule) { + if (fault_status == FSC_PERM && vma_pagesize == fault_granule) ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot); - } else { - WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n"); - + else ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize, __pfn_to_phys(pfn), prot, - memcache); - } + memcache, KVM_PGTABLE_WALK_SHARED); /* Mark the page dirty only if the fault is handled successfully */ if (writable && !ret) { @@ -1340,10 +1390,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, } out_unlock: - if (use_read_lock) - read_unlock(&kvm->mmu_lock); - else - write_unlock(&kvm->mmu_lock); + read_unlock(&kvm->mmu_lock); kvm_set_pfn_accessed(pfn); kvm_release_pfn_clean(pfn); return ret != -EAGAIN ? ret : 0; @@ -1526,15 +1573,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { kvm_pfn_t pfn = pte_pfn(range->pte); - int ret; if (!kvm->arch.mmu.pgt) return false; WARN_ON(range->end - range->start != 1); - ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE); - if (ret) + /* + * If the page isn't tagged, defer to user_mem_abort() for sanitising + * the MTE tags. The S2 pte should have been unmapped by + * mmu_notifier_invalidate_range_end(). + */ + if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn))) return false; /* @@ -1549,7 +1599,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) */ kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT, PAGE_SIZE, __pfn_to_phys(pfn), - KVM_PGTABLE_PROT_R, NULL); + KVM_PGTABLE_PROT_R, NULL, 0); return false; } @@ -1618,6 +1668,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = { int kvm_mmu_init(u32 *hyp_va_bits) { int err; + u32 idmap_bits; + u32 kernel_bits; hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); @@ -1631,7 +1683,31 @@ int kvm_mmu_init(u32 *hyp_va_bits) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - *hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + /* + * The ID map may be configured to use an extended virtual address + * range. This is only the case if system RAM is out of range for the + * currently configured page size and VA_BITS_MIN, in which case we will + * also need the extended virtual range for the HYP ID map, or we won't + * be able to enable the EL2 MMU. + * + * However, in some cases the ID map may be configured for fewer than + * the number of VA bits used by the regular kernel stage 1. This + * happens when VA_BITS=52 and the kernel image is placed in PA space + * below 48 bits. + * + * At EL2, there is only one TTBR register, and we can't switch between + * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom + * line: we need to use the extended range with *both* our translation + * tables. + * + * So use the maximum of the idmap VA bits and the regular kernel stage + * 1 VA bits to assure that the hypervisor can both ID map its code page + * and map any kernel memory. + */ + idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET); + kernel_bits = vabits_actual; + *hyp_va_bits = max(idmap_bits, kernel_bits); + kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits); kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); kvm_debug("HYP VA range: %lx:%lx\n", @@ -1740,12 +1816,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (!vma) break; - /* - * VM_SHARED mappings are not allowed with MTE to avoid races - * when updating the PG_mte_tagged page flag, see - * sanitise_mte_tags for more details. - */ - if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) { + if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) { ret = -EINVAL; break; } diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c index ebecb7c045f4366ebbab3035ec696ce901523728..cf56958b1492a486f7e549c95d1f5ec828349a6b 100644 --- a/arch/arm64/kvm/pkvm.c +++ b/arch/arm64/kvm/pkvm.c @@ -6,6 +6,7 @@ #include #include +#include #include #include @@ -53,7 +54,7 @@ static int __init register_memblock_regions(void) void __init kvm_hyp_reserve(void) { - u64 nr_pages, prev, hyp_mem_pages = 0; + u64 hyp_mem_pages = 0; int ret; if (!is_hyp_mode_available() || is_kernel_in_hyp_mode()) @@ -71,21 +72,8 @@ void __init kvm_hyp_reserve(void) hyp_mem_pages += hyp_s1_pgtable_pages(); hyp_mem_pages += host_s2_pgtable_pages(); - - /* - * The hyp_vmemmap needs to be backed by pages, but these pages - * themselves need to be present in the vmemmap, so compute the number - * of pages needed by looking for a fixed point. - */ - nr_pages = 0; - do { - prev = nr_pages; - nr_pages = hyp_mem_pages + prev; - nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE, - PAGE_SIZE); - nr_pages += __hyp_pgtable_max_pages(nr_pages); - } while (nr_pages != prev); - hyp_mem_pages += nr_pages; + hyp_mem_pages += hyp_vm_table_pages(); + hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE); /* * Try to allocate a PMD-aligned region to reduce TLB pressure once @@ -107,3 +95,121 @@ void __init kvm_hyp_reserve(void) kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20, hyp_mem_base); } + +/* + * Allocates and donates memory for hypervisor VM structs at EL2. + * + * Allocates space for the VM state, which includes the hyp vm as well as + * the hyp vcpus. + * + * Stores an opaque handler in the kvm struct for future reference. + * + * Return 0 on success, negative error code on failure. + */ +static int __pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz; + struct kvm_vcpu *host_vcpu; + pkvm_handle_t handle; + void *pgd, *hyp_vm; + unsigned long idx; + int ret; + + if (host_kvm->created_vcpus < 1) + return -EINVAL; + + pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr); + + /* + * The PGD pages will be reclaimed using a hyp_memcache which implies + * page granularity. So, use alloc_pages_exact() to get individual + * refcounts. + */ + pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT); + if (!pgd) + return -ENOMEM; + + /* Allocate memory to donate to hyp for vm and vcpu pointers. */ + hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE, + size_mul(sizeof(void *), + host_kvm->created_vcpus))); + hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vm) { + ret = -ENOMEM; + goto free_pgd; + } + + /* Donate the VM memory to hyp and let hyp initialize it. */ + ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd); + if (ret < 0) + goto free_vm; + + handle = ret; + + host_kvm->arch.pkvm.handle = handle; + + /* Donate memory for the vcpus at hyp and initialize it. */ + hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE); + kvm_for_each_vcpu(idx, host_vcpu, host_kvm) { + void *hyp_vcpu; + + /* Indexing of the vcpus to be sequential starting at 0. */ + if (WARN_ON(host_vcpu->vcpu_idx != idx)) { + ret = -EINVAL; + goto destroy_vm; + } + + hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT); + if (!hyp_vcpu) { + ret = -ENOMEM; + goto destroy_vm; + } + + ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu, + hyp_vcpu); + if (ret) { + free_pages_exact(hyp_vcpu, hyp_vcpu_sz); + goto destroy_vm; + } + } + + return 0; + +destroy_vm: + pkvm_destroy_hyp_vm(host_kvm); + return ret; +free_vm: + free_pages_exact(hyp_vm, hyp_vm_sz); +free_pgd: + free_pages_exact(pgd, pgd_sz); + return ret; +} + +int pkvm_create_hyp_vm(struct kvm *host_kvm) +{ + int ret = 0; + + mutex_lock(&host_kvm->lock); + if (!host_kvm->arch.pkvm.handle) + ret = __pkvm_create_hyp_vm(host_kvm); + mutex_unlock(&host_kvm->lock); + + return ret; +} + +void pkvm_destroy_hyp_vm(struct kvm *host_kvm) +{ + if (host_kvm->arch.pkvm.handle) { + WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm, + host_kvm->arch.pkvm.handle)); + } + + host_kvm->arch.pkvm.handle = 0; + free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc); +} + +int pkvm_init_host_vm(struct kvm *host_kvm) +{ + mutex_init(&host_kvm->lock); + return 0; +} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 0003c7d37533a87ea5be34414b812996cf094fc8..24908400e190616f317b9e6725b4605c12d1c8a4 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -15,16 +15,25 @@ #include #include +#define PERF_ATTR_CFG1_COUNTER_64BIT BIT(0) + DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available); static LIST_HEAD(arm_pmus); static DEFINE_MUTEX(arm_pmus_lock); -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc); +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc); + +static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc) +{ + return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]); +} -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 +static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx) +{ + return &vcpu->arch.pmu.pmc[cnt_idx]; +} static u32 kvm_pmu_event_mask(struct kvm *kvm) { @@ -47,113 +56,46 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) } /** - * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter - * @vcpu: The vcpu pointer - * @select_idx: The counter index + * kvm_pmc_is_64bit - determine if counter is 64bit + * @pmc: counter context */ -static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc) { - return (select_idx == ARMV8_PMU_CYCLE_IDX && - __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); + return (pmc->idx == ARMV8_PMU_CYCLE_IDX || + kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc))); } -static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) +static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc) { - struct kvm_pmu *pmu; - struct kvm_vcpu_arch *vcpu_arch; + u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0); - pmc -= pmc->idx; - pmu = container_of(pmc, struct kvm_pmu, pmc[0]); - vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); - return container_of(vcpu_arch, struct kvm_vcpu, arch); + return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) || + (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC)); } -/** - * kvm_pmu_pmc_is_chained - determine if the pmc is chained - * @pmc: The PMU counter pointer - */ -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) +static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc) { - struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); - - return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX && + !kvm_pmc_has_64bit_overflow(pmc)); } -/** - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter - * @select_idx: The counter index - */ -static bool kvm_pmu_idx_is_high_counter(u64 select_idx) -{ - return select_idx & 0x1; -} - -/** - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc - * @pmc: The PMU counter pointer - * - * When a pair of PMCs are chained together we use the low counter (canonical) - * to hold the underlying perf event. - */ -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) -{ - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - - return pmc; -} -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +static u32 counter_index_to_reg(u64 idx) { - if (kvm_pmu_idx_is_high_counter(pmc->idx)) - return pmc - 1; - else - return pmc + 1; + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx; } -/** - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain - * @vcpu: The vcpu pointer - * @select_idx: The counter index - */ -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) +static u32 counter_index_to_evtreg(u64 idx) { - u64 eventsel, reg; - - select_idx |= 0x1; - - if (select_idx == ARMV8_PMU_CYCLE_IDX) - return false; - - reg = PMEVTYPER0_EL0 + select_idx; - eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm); - - return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; + return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx; } -/** - * kvm_pmu_get_pair_counter_value - get PMU counter value - * @vcpu: The vcpu pointer - * @pmc: The PMU counter pointer - */ -static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, - struct kvm_pmc *pmc) +static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc) { - u64 counter, counter_high, reg, enabled, running; - - if (kvm_pmu_pmc_is_chained(pmc)) { - pmc = kvm_pmu_get_canonical_pmc(pmc); - reg = PMEVCNTR0_EL0 + pmc->idx; + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + u64 counter, reg, enabled, running; - counter = __vcpu_sys_reg(vcpu, reg); - counter_high = __vcpu_sys_reg(vcpu, reg + 1); - - counter = lower_32_bits(counter) | (counter_high << 32); - } else { - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; - counter = __vcpu_sys_reg(vcpu, reg); - } + reg = counter_index_to_reg(pmc->idx); + counter = __vcpu_sys_reg(vcpu, reg); /* * The real counter value is equal to the value of counter register plus @@ -163,6 +105,9 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, counter += perf_event_read_value(pmc->perf_event, &enabled, &running); + if (!kvm_pmc_is_64bit(pmc)) + counter = lower_32_bits(counter); + return counter; } @@ -173,22 +118,37 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, */ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) { - u64 counter; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx]; - if (!kvm_vcpu_has_pmu(vcpu)) return 0; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx)); +} - if (kvm_pmu_pmc_is_chained(pmc) && - kvm_pmu_idx_is_high_counter(select_idx)) - counter = upper_32_bits(counter); - else if (select_idx != ARMV8_PMU_CYCLE_IDX) - counter = lower_32_bits(counter); +static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + u64 reg; - return counter; + kvm_pmu_release_perf_event(pmc); + + reg = counter_index_to_reg(pmc->idx); + + if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX && + !force) { + /* + * Even with PMUv3p5, AArch32 cannot write to the top + * 32bit of the counters. The only possible course of + * action is to use PMCR.P, which will reset them to + * 0 (the only use of the 'force' parameter). + */ + val = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32); + val |= lower_32_bits(val); + } + + __vcpu_sys_reg(vcpu, reg) = val; + + /* Recreate the perf event to reflect the updated sample_period */ + kvm_pmu_create_perf_event(pmc); } /** @@ -199,17 +159,10 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) */ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) { - u64 reg; - if (!kvm_vcpu_has_pmu(vcpu)) return; - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; - __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); - - /* Recreate the perf event to reflect the updated sample_period */ - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false); } /** @@ -218,7 +171,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) */ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) { - pmc = kvm_pmu_get_canonical_pmc(pmc); if (pmc->perf_event) { perf_event_disable(pmc->perf_event); perf_event_release_kernel(pmc->perf_event); @@ -232,29 +184,20 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) * * If this counter has been configured to monitor some event, release it here. */ -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +static void kvm_pmu_stop_counter(struct kvm_pmc *pmc) { - u64 counter, reg, val; + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + u64 reg, val; - pmc = kvm_pmu_get_canonical_pmc(pmc); if (!pmc->perf_event) return; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + val = kvm_pmu_get_pmc_value(pmc); - if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { - reg = PMCCNTR_EL0; - val = counter; - } else { - reg = PMEVCNTR0_EL0 + pmc->idx; - val = lower_32_bits(counter); - } + reg = counter_index_to_reg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = val; - if (kvm_pmu_pmc_is_chained(pmc)) - __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); - kvm_pmu_release_perf_event(pmc); } @@ -280,13 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) { unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); - struct kvm_pmu *pmu = &vcpu->arch.pmu; int i; for_each_set_bit(i, &mask, 32) - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); - - bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); + kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i)); } /** @@ -297,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) - kvm_pmu_release_perf_event(&pmu->pmc[i]); + kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i)); irq_work_sync(&vcpu->arch.pmu.overflow_work); } @@ -325,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; - if (!kvm_vcpu_has_pmu(vcpu)) return; @@ -335,17 +271,16 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; - - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); - /* At this point, pmc must be the canonical */ - if (pmc->perf_event) { + if (!pmc->perf_event) { + kvm_pmu_create_perf_event(pmc); + } else { perf_event_enable(pmc->perf_event); if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) kvm_debug("fail to enable perf event\n"); @@ -363,23 +298,18 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) { int i; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; if (!kvm_vcpu_has_pmu(vcpu) || !val) return; for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + struct kvm_pmc *pmc; + if (!(val & BIT(i))) continue; - pmc = &pmu->pmc[i]; - - /* A change in the enable state may affect the chain state */ - kvm_pmu_update_pmc_chained(vcpu, i); - kvm_pmu_create_perf_event(vcpu, i); + pmc = kvm_vcpu_idx_to_pmc(vcpu, i); - /* At this point, pmc must be the canonical */ if (pmc->perf_event) perf_event_disable(pmc->perf_event); } @@ -476,14 +406,69 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work) { struct kvm_vcpu *vcpu; - struct kvm_pmu *pmu; - - pmu = container_of(work, struct kvm_pmu, overflow_work); - vcpu = kvm_pmc_to_vcpu(pmu->pmc); + vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work); kvm_vcpu_kick(vcpu); } +/* + * Perform an increment on any of the counters described in @mask, + * generating the overflow if required, and propagate it as a chained + * event if possible. + */ +static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, + unsigned long mask, u32 event) +{ + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i); + u64 type, reg; + + /* Filter on event type */ + type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i)); + type &= kvm_pmu_event_mask(vcpu->kvm); + if (type != event) + continue; + + /* Increment this counter */ + reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1; + if (!kvm_pmc_is_64bit(pmc)) + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg; + + /* No overflow? move on */ + if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg)) + continue; + + /* Mark overflow */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + + if (kvm_pmu_counter_can_chain(pmc)) + kvm_pmu_counter_increment(vcpu, BIT(i + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + } +} + +/* Compute the sample period for a given counter value */ +static u64 compute_period(struct kvm_pmc *pmc, u64 counter) +{ + u64 val; + + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) + val = (-counter) & GENMASK(63, 0); + else + val = (-counter) & GENMASK(31, 0); + + return val; +} + /** * When the perf event overflows, set the overflow status and inform the vcpu. */ @@ -503,10 +488,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, * Reset the sample period to the architectural limit, * i.e. the point where the counter overflows. */ - period = -(local64_read(&perf_event->count)); - - if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - period &= GENMASK(31, 0); + period = compute_period(pmc, local64_read(&perf_event->count)); local64_set(&perf_event->hw.period_left, 0); perf_event->attr.sample_period = period; @@ -514,6 +496,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + if (kvm_pmu_counter_can_chain(pmc)) + kvm_pmu_counter_increment(vcpu, BIT(idx + 1), + ARMV8_PMUV3_PERFCTR_CHAIN); + if (kvm_pmu_overflow_status(vcpu)) { kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); @@ -533,50 +519,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event, */ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - int i; - - if (!kvm_vcpu_has_pmu(vcpu)) - return; - - if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) - return; - - /* Weed out disabled counters */ - val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); - - for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { - u64 type, reg; - - if (!(val & BIT(i))) - continue; - - /* PMSWINC only applies to ... SW_INC! */ - type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); - type &= kvm_pmu_event_mask(vcpu->kvm); - if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) - continue; - - /* increment this even SW_INC counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; - - if (reg) /* no overflow on the low part */ - continue; - - if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { - /* increment the high counter */ - reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; - reg = lower_32_bits(reg); - __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; - if (!reg) /* mark overflow on the high counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); - } else { - /* mark overflow on low counter */ - __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); - } - } + kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR); } /** @@ -591,6 +534,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) if (!kvm_vcpu_has_pmu(vcpu)) return; + /* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */ + if (!kvm_pmu_is_3p5(vcpu)) + val &= ~ARMV8_PMU_PMCR_LP; + + __vcpu_sys_reg(vcpu, PMCR_EL0) = val; + if (val & ARMV8_PMU_PMCR_E) { kvm_pmu_enable_counter_mask(vcpu, __vcpu_sys_reg(vcpu, PMCNTENSET_EL0)); @@ -606,49 +555,44 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); mask &= ~BIT(ARMV8_PMU_CYCLE_IDX); for_each_set_bit(i, &mask, 32) - kvm_pmu_set_counter_value(vcpu, i, 0); + kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true); } } -static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && - (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx)); } /** * kvm_pmu_create_perf_event - create a perf event for a counter - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter + * @pmc: Counter context */ -static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc) { + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu; - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc; struct perf_event *event; struct perf_event_attr attr; - u64 eventsel, counter, reg, data; + u64 eventsel, reg, data; - /* - * For chained counters the event type and filtering attributes are - * obtained from the low/even counter. We also use this counter to - * determine if the event is enabled/disabled. - */ - pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); - - reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + reg = counter_index_to_evtreg(pmc->idx); data = __vcpu_sys_reg(vcpu, reg); - kvm_pmu_stop_counter(vcpu, pmc); + kvm_pmu_stop_counter(pmc); if (pmc->idx == ARMV8_PMU_CYCLE_IDX) eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; else eventsel = data & kvm_pmu_event_mask(vcpu->kvm); - /* Software increment event doesn't need to be backed by a perf event */ - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) + /* + * Neither SW increment nor chained events need to be backed + * by a perf event. + */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR || + eventsel == ARMV8_PMUV3_PERFCTR_CHAIN) return; /* @@ -663,37 +607,25 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) attr.type = arm_pmu->pmu.type; attr.size = sizeof(attr); attr.pinned = 1; - attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.disabled = !kvm_pmu_counter_is_enabled(pmc); attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; attr.exclude_hv = 1; /* Don't count EL2 events */ attr.exclude_host = 1; /* Don't count host events */ attr.config = eventsel; - counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); - - if (kvm_pmu_pmc_is_chained(pmc)) { - /** - * The initial sample period (overflow count) of an event. For - * chained counters we only support overflow interrupts on the - * high counter. - */ - attr.sample_period = (-counter) & GENMASK(63, 0); - attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + /* + * If counting with a 64bit counter, advertise it to the perf + * code, carefully dealing with the initial sample period + * which also depends on the overflow. + */ + if (kvm_pmc_is_64bit(pmc)) + attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT; - event = perf_event_create_kernel_counter(&attr, -1, current, - kvm_pmu_perf_overflow, - pmc + 1); - } else { - /* The initial sample period (overflow count) of an event. */ - if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) - attr.sample_period = (-counter) & GENMASK(63, 0); - else - attr.sample_period = (-counter) & GENMASK(31, 0); + attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc)); - event = perf_event_create_kernel_counter(&attr, -1, current, + event = perf_event_create_kernel_counter(&attr, -1, current, kvm_pmu_perf_overflow, pmc); - } if (IS_ERR(event)) { pr_err_once("kvm: pmu event creation failed %ld\n", @@ -704,41 +636,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) pmc->perf_event = event; } -/** - * kvm_pmu_update_pmc_chained - update chained bitmap - * @vcpu: The vcpu pointer - * @select_idx: The number of selected counter - * - * Update the chained bitmap based on the event type written in the - * typer register and the enable state of the odd register. - */ -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) -{ - struct kvm_pmu *pmu = &vcpu->arch.pmu; - struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; - bool new_state, old_state; - - old_state = kvm_pmu_pmc_is_chained(pmc); - new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && - kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); - - if (old_state == new_state) - return; - - canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); - kvm_pmu_stop_counter(vcpu, canonical_pmc); - if (new_state) { - /* - * During promotion from !chained to chained we must ensure - * the adjacent counter is stopped and its event destroyed - */ - kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); - set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); - return; - } - clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); -} - /** * kvm_pmu_set_counter_event_type - set selected counter to monitor some event * @vcpu: The vcpu pointer @@ -752,6 +649,7 @@ static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, u64 select_idx) { + struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx); u64 reg, mask; if (!kvm_vcpu_has_pmu(vcpu)) @@ -761,20 +659,19 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, mask &= ~ARMV8_PMU_EVTYPE_EVENT; mask |= kvm_pmu_event_mask(vcpu->kvm); - reg = (select_idx == ARMV8_PMU_CYCLE_IDX) - ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + reg = counter_index_to_evtreg(pmc->idx); __vcpu_sys_reg(vcpu, reg) = data & mask; - kvm_pmu_update_pmc_chained(vcpu, select_idx); - kvm_pmu_create_perf_event(vcpu, select_idx); + kvm_pmu_create_perf_event(pmc); } void kvm_host_pmu_init(struct arm_pmu *pmu) { struct arm_pmu_entry *entry; - if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || + pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) return; mutex_lock(&arm_pmus_lock); @@ -827,7 +724,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void) if (event->pmu) { pmu = to_arm_pmu(event->pmu); - if (pmu->pmuver == 0 || + if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF) pmu = NULL; } @@ -849,6 +746,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) if (!pmceid1) { val = read_sysreg(pmceid0_el0); + /* always support CHAIN */ + val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN); base = 0; } else { val = read_sysreg(pmceid1_el0); @@ -1150,3 +1049,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -ENXIO; } + +u8 kvm_arm_pmu_get_pmuver_limit(void) +{ + u64 tmp; + + tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1); + tmp = cpuid_feature_cap_perfmon_field(tmp, + ID_AA64DFR0_EL1_PMUVer_SHIFT, + ID_AA64DFR0_EL1_PMUVer_V3P5); + return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp); +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ae18472205a9f34b73eb087b1ba383edee46ad0..e0267f672b8abec1e72bdf80c7d1abb37518ad33 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -395,32 +395,3 @@ int kvm_set_ipa_limit(void) return 0; } - -int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) -{ - u64 mmfr0, mmfr1; - u32 phys_shift; - - if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK) - return -EINVAL; - - phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type); - if (phys_shift) { - if (phys_shift > kvm_ipa_limit || - phys_shift < ARM64_MIN_PARANGE_BITS) - return -EINVAL; - } else { - phys_shift = KVM_PHYS_SHIFT; - if (phys_shift > kvm_ipa_limit) { - pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n", - current->comm); - return -EINVAL; - } - } - - mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); - mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift); - - return 0; -} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 608e4f25161d3c162feb13bc49abcbbea81fc6b1..d5ee52d6bf7326f9254298631ef2fc0824c0e176 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -639,22 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { - u64 pmcr, val; + u64 pmcr; /* No PMU available, PMCR_EL0 may UNDEF... */ if (!kvm_arm_support_pmu_v3()) return; - pmcr = read_sysreg(pmcr_el0); - /* - * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN - * except PMCR.E resetting to zero. - */ - val = ((pmcr & ~ARMV8_PMU_PMCR_MASK) - | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E); + /* Only preserve PMCR_EL0.N, and reset the rest to 0 */ + pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK; if (!kvm_supports_32bit_el0()) - val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, r->reg) = val; + pmcr |= ARMV8_PMU_PMCR_LC; + + __vcpu_sys_reg(vcpu, r->reg) = pmcr; } static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags) @@ -697,13 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return false; if (p->is_write) { - /* Only update writeable bits of PMCR */ + /* + * Only update writeable bits of PMCR (continuing into + * kvm_pmu_handle_pmcr() as well) + */ val = __vcpu_sys_reg(vcpu, PMCR_EL0); val &= ~ARMV8_PMU_PMCR_MASK; val |= p->regval & ARMV8_PMU_PMCR_MASK; if (!kvm_supports_32bit_el0()) val |= ARMV8_PMU_PMCR_LC; - __vcpu_sys_reg(vcpu, PMCR_EL0) = val; kvm_pmu_handle_pmcr(vcpu, val); kvm_vcpu_pmu_restore_guest(vcpu); } else { @@ -1062,6 +1060,40 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu, return true; } +static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu) +{ + if (kvm_vcpu_has_pmu(vcpu)) + return vcpu->kvm->arch.dfr0_pmuver.imp; + + return vcpu->kvm->arch.dfr0_pmuver.unimp; +} + +static u8 perfmon_to_pmuver(u8 perfmon) +{ + switch (perfmon) { + case ID_DFR0_EL1_PerfMon_PMUv3: + return ID_AA64DFR0_EL1_PMUVer_IMP; + case ID_DFR0_EL1_PerfMon_IMPDEF: + return ID_AA64DFR0_EL1_PMUVer_IMP_DEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return perfmon; + } +} + +static u8 pmuver_to_perfmon(u8 pmuver) +{ + switch (pmuver) { + case ID_AA64DFR0_EL1_PMUVer_IMP: + return ID_DFR0_EL1_PerfMon_PMUv3; + case ID_AA64DFR0_EL1_PMUVer_IMP_DEF: + return ID_DFR0_EL1_PerfMon_IMPDEF; + default: + /* Anything ARMv8.1+ and NI have the same value. For now. */ + return pmuver; + } +} + /* Read a sanitised cpufeature ID register by sys_reg_desc */ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r) { @@ -1111,18 +1143,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6); - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_AA64DFR0_EL1_PMUVer_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0); + /* Set PMUver to the required version */ + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), + vcpu_pmuver(vcpu)); /* Hide SPE from guests */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer); break; case SYS_ID_DFR0_EL1: - /* Limit guests to PMUv3 for ARMv8.4 */ - val = cpuid_feature_cap_perfmon_field(val, - ID_DFR0_EL1_PerfMon_SHIFT, - kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_EL1_PerfMon_PMUv3p4 : 0); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), + pmuver_to_perfmon(vcpu_pmuver(vcpu))); break; } @@ -1222,6 +1253,85 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu, return 0; } +static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 pmuver, host_pmuver; + bool valid_pmu; + + host_pmuver = kvm_arm_pmu_get_pmuver_limit(); + + /* + * Allow AA64DFR0_EL1.PMUver to be set from userspace as long + * as it doesn't promise more than what the HW gives us. We + * allow an IMPDEF PMU though, only if no PMU is supported + * (KVM backward compatibility handling). + */ + pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val); + if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver)) + return -EINVAL; + + valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PMUver, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = pmuver; + else + vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver; + + return 0; +} + +static int set_id_dfr0_el1(struct kvm_vcpu *vcpu, + const struct sys_reg_desc *rd, + u64 val) +{ + u8 perfmon, host_perfmon; + bool valid_pmu; + + host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit()); + + /* + * Allow DFR0_EL1.PerfMon to be set from userspace as long as + * it doesn't promise more than what the HW gives us on the + * AArch64 side (as everything is emulated with that), and + * that this is a PMUv3. + */ + perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon), val); + if ((perfmon != ID_DFR0_EL1_PerfMon_IMPDEF && perfmon > host_perfmon) || + (perfmon != 0 && perfmon < ID_DFR0_EL1_PerfMon_PMUv3)) + return -EINVAL; + + valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_EL1_PerfMon_IMPDEF); + + /* Make sure view register and PMU support do match */ + if (kvm_vcpu_has_pmu(vcpu) != valid_pmu) + return -EINVAL; + + /* We can only differ with PerfMon, and anything else is an error */ + val ^= read_id_reg(vcpu, rd); + val &= ~ARM64_FEATURE_MASK(ID_DFR0_EL1_PerfMon); + if (val) + return -EINVAL; + + if (valid_pmu) + vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon); + else + vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon); + + return 0; +} + /* * cpufeature ID register user accessors * @@ -1443,7 +1553,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { /* CRm=1 */ AA32_ID_SANITISED(ID_PFR0_EL1), AA32_ID_SANITISED(ID_PFR1_EL1), - AA32_ID_SANITISED(ID_DFR0_EL1), + { SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_dfr0_el1, + .visibility = aa32_id_visibility, }, ID_HIDDEN(ID_AFR0_EL1), AA32_ID_SANITISED(ID_MMFR0_EL1), AA32_ID_SANITISED(ID_MMFR1_EL1), @@ -1483,7 +1595,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_UNALLOCATED(4,7), /* CRm=5 */ - ID_SANITISED(ID_AA64DFR0_EL1), + { SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg, + .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, }, ID_SANITISED(ID_AA64DFR1_EL1), ID_UNALLOCATED(5,2), ID_UNALLOCATED(5,3), diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 733b53055f9760fe01d22c3d28006c3a7cefae65..94a666dd1443d1310aadf35a1267e26d04709eca 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev, static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) { const struct vgic_its_abi *abi = vgic_its_get_abi(its); + struct vgic_dist *dist = &kvm->arch.vgic; int ret = 0; if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ @@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) vgic_its_reset(kvm, its); break; case KVM_DEV_ARM_ITS_SAVE_TABLES: + dist->save_its_tables_in_progress = true; ret = abi->save_tables(its); + dist->save_its_tables_in_progress = false; break; case KVM_DEV_ARM_ITS_RESTORE_TABLES: ret = abi->restore_tables(its); @@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) return ret; } +/* + * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory + * without the running VCPU when dirty ring is enabled. + * + * The running VCPU is required to track dirty guest pages when dirty ring + * is enabled. Otherwise, the backup bitmap should be used to track the + * dirty guest pages. When vgic/its tables are being saved, the backup + * bitmap is used to track the dirty guest pages due to the missed running + * VCPU in the period. + */ +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + return dist->save_its_tables_in_progress; +} + static int vgic_its_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c index 24913271e898c121a9083610144ec640804f02c0..8dd5a8fe64b4f6688889ace395aedc83a735ae9f 100644 --- a/arch/arm64/mm/copypage.c +++ b/arch/arm64/mm/copypage.c @@ -21,9 +21,12 @@ void copy_highpage(struct page *to, struct page *from) copy_page(kto, kfrom); - if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { - set_bit(PG_mte_tagged, &to->flags); + if (system_supports_mte() && page_mte_tagged(from)) { + page_kasan_tag_reset(to); + /* It's a new page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(to)); mte_copy_page_tags(kto, kfrom); + set_page_mte_tagged(to); } } EXPORT_SYMBOL(copy_highpage); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 3eb2825d08cffc7335fd64e643fbed105541a424..596f46dabe4ef2f16f5d10b05e5f9983878961bb 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -943,6 +943,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, void tag_clear_highpage(struct page *page) { + /* Newly allocated page, shouldn't have been tagged yet */ + WARN_ON_ONCE(!try_page_mte_tagging(page)); mte_zero_clear_page_tags(page_address(page)); - set_bit(PG_mte_tagged, &page->flags); + set_page_mte_tagged(page); } diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index bed803d8e15856b56468fcbc6b271f82b4466261..cd508ba80ab1ba889fdc29647297c6707418e531 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -24,7 +24,7 @@ int mte_save_tags(struct page *page) { void *tag_storage, *ret; - if (!test_bit(PG_mte_tagged, &page->flags)) + if (!page_mte_tagged(page)) return 0; tag_storage = mte_allocate_tag_storage(); @@ -46,21 +46,17 @@ int mte_save_tags(struct page *page) return 0; } -bool mte_restore_tags(swp_entry_t entry, struct page *page) +void mte_restore_tags(swp_entry_t entry, struct page *page) { void *tags = xa_load(&mte_pages, entry.val); if (!tags) - return false; + return; - /* - * Test PG_mte_tagged again in case it was racing with another - * set_pte_at(). - */ - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) + if (try_page_mte_tagging(page)) { mte_restore_page_tags(page_address(page), tags); - - return true; + set_page_mte_tagged(page); + } } void mte_invalidate_tags(int type, pgoff_t offset) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index c6e06cdc738f0bb0696c8451a64ec8c64f7835d2..d7e4a24e8644cdecb03d96228871e8e294a2004e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -63,6 +63,7 @@ config IA64 select NUMA if !FLATMEM select PCI_MSI_ARCH_FALLBACKS if PCI_MSI select ZONE_DMA32 + select FUNCTION_ALIGNMENT_32B default y help The Itanium Processor Family is Intel's 64-bit successor to diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile index 56c4bb276b6eda0e19c91fca6b2fd216be0e7467..d553ab7022fe4088459996580bbc0f41de05f2bd 100644 --- a/arch/ia64/Makefile +++ b/arch/ia64/Makefile @@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp EXTRA := cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ - -falign-functions=32 -frename-registers -fno-optimize-sibling-calls + -frename-registers -fno-optimize-sibling-calls KBUILD_CFLAGS_KERNEL := -mconstant-gp GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index e9744b41a226ca9a13b89f524d24f057dbfea9d0..4939f57b6f6a2256ede3bba3e9af0789eed06e9e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -598,7 +598,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu, write_ok = true; } else { /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, writing, &write_ok, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index 5d5e12f3bf864a89ad9985788a4bb26b78a87745..9d3743ca16d53e6baf9083bee97cd58101acd695 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -846,7 +846,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, unsigned long pfn; /* Call KVM generic code to do the slow-path check */ - pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL, + pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL, writing, upgrade_p, NULL); if (is_error_noslot_pfn(pfn)) return -EFAULT; diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h deleted file mode 100644 index e6463f866abc24bdc1a1f1e04e3c0b6333fbdedc..0000000000000000000000000000000000000000 --- a/arch/powerpc/kvm/irq.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __IRQ_H -#define __IRQ_H - -#include - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - int ret = 0; - -#ifdef CONFIG_KVM_MPIC - ret = ret || (kvm->arch.mpic != NULL); -#endif -#ifdef CONFIG_KVM_XICS - ret = ret || (kvm->arch.xics != NULL); - ret = ret || (kvm->arch.xive != NULL); -#endif - smp_rmb(); - return ret; -} - -#endif diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index b850b0efa201a35bc49b4c24bffc56581c8a34d4..04494a4fb37a0ad4026dde26ec7b11e285137f1f 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -36,7 +36,6 @@ #include #include "timing.h" -#include "irq.h" #include "../mm/mmu_decl.h" #define CREATE_TRACE_POINTS @@ -2165,10 +2164,25 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm->arch.mpic != NULL); +#endif +#ifdef CONFIG_KVM_XICS + ret = ret || (kvm->arch.xics != NULL); + ret = ret || (kvm->arch.xive != NULL); +#endif + smp_rmb(); + return ret; +} + int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, bool line_status) { - if (!irqchip_in_kernel(kvm)) + if (!kvm_arch_irqchip_in_kernel(kvm)) return -ENXIO; irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 593cf09264d80a6afbe3704b176d44ff0f024929..e2b656043abf31128571abb8329327cc1044e96f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -25,6 +25,7 @@ config RISCV select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_MMIOWB + select ARCH_HAS_PMEM_API select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP if MMU select ARCH_HAS_SET_MEMORY if MMU @@ -72,6 +73,8 @@ config RISCV select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO select HARDIRQS_SW_RESEND select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP + select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL select HAVE_ARCH_KASAN if MMU && 64BIT @@ -99,6 +102,7 @@ config RISCV select HAVE_KPROBES if !XIP_KERNEL select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL select HAVE_KRETPROBES if !XIP_KERNEL + select HAVE_RETHOOK if !XIP_KERNEL select HAVE_MOVE_PMD select HAVE_MOVE_PUD select HAVE_PCI @@ -123,12 +127,18 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI + select SIFIVE_PLIC select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK select TRACE_IRQFLAGS_SUPPORT select UACCESS_MEMCPY if !MMU select ZONE_DMA32 if 64BIT + select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) + select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE + select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER if !XIP_KERNEL config ARCH_MMAP_RND_BITS_MIN default 18 if 64BIT @@ -274,11 +284,6 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 - select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8) - select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if !XIP_KERNEL select SWIOTLB if MMU endchoice @@ -502,7 +507,7 @@ config KEXEC_FILE select KEXEC_CORE select KEXEC_ELF select HAVE_IMA_KEXEC if IMA - depends on 64BIT + depends on 64BIT && MMU help This is new version of kexec system call. This system call is file based and takes file descriptors as system call argument @@ -691,6 +696,8 @@ menu "CPU Power Management" source "drivers/cpuidle/Kconfig" +source "drivers/cpufreq/Kconfig" + endmenu # "CPU Power Management" source "arch/riscv/kvm/Kconfig" diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index f3623df23b5fcd902339552e303c257a4dd6efa4..69621ae6d647aa369cee22c5751d3a55ec93e0b6 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -66,4 +66,17 @@ config ERRATA_THEAD_CMO If you don't know what to do here, say "Y". +config ERRATA_THEAD_PMU + bool "Apply T-Head PMU errata" + depends on ERRATA_THEAD && RISCV_PMU_SBI + default y + help + The T-Head C9xx cores implement a PMU overflow extension very + similar to the core SSCOFPMF extension. + + This will apply the overflow errata to handle the non-standard + behaviour via the regular SBI PMU driver and interface. + + If you don't know what to do here, say "Y". + endmenu # "CPU errata selection" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 75fb0390d6bd1fbcf4f2c1e77a81adae469eaa18..4b6deb2715f1c43f7b3b0e52329c4fa82b4ab5ae 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -3,7 +3,6 @@ menu "SoC selection" config SOC_MICROCHIP_POLARFIRE bool "Microchip PolarFire SoCs" select MCHP_CLK_MPFS - select SIFIVE_PLIC help This enables support for Microchip PolarFire SoC platforms. @@ -18,7 +17,6 @@ config SOC_SIFIVE select SERIAL_SIFIVE_CONSOLE if TTY select CLK_SIFIVE select CLK_SIFIVE_PRCI - select SIFIVE_PLIC select ERRATA_SIFIVE if !XIP_KERNEL help This enables support for SiFive SoC platform hardware. @@ -27,7 +25,6 @@ config SOC_STARFIVE bool "StarFive SoCs" select PINCTRL select RESET_CONTROLLER - select SIFIVE_PLIC help This enables support for StarFive SoC platform hardware. @@ -39,7 +36,6 @@ config SOC_VIRT select POWER_RESET_SYSCON_POWEROFF select GOLDFISH select RTC_DRV_GOLDFISH if RTC_CLASS - select SIFIVE_PLIC select PM_GENERIC_DOMAINS if PM select PM_GENERIC_DOMAINS_OF if PM && OF select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI @@ -52,7 +48,6 @@ config SOC_CANAAN select CLINT_TIMER if RISCV_M_MODE select SERIAL_SIFIVE if TTY select SERIAL_SIFIVE_CONSOLE if TTY - select SIFIVE_PLIC select ARCH_HAS_RESET_CONTROLLER select PINCTRL select COMMON_CLK diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index d1a49adcb1d76283233785ec3bed58e2d1278e47..c72de7232abb2bf64a2bf279bb9359b2753c2667 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -56,6 +56,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/Image.zst: $(obj)/Image FORCE + $(call if_changed,zstd) + $(obj)/loader.bin: $(obj)/loader FORCE $(call if_changed,objcopy) diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index f7f32448f160c788911acdb8b9b91a7c9b1133b4..128dcf4c0814bb42fad778f13750073a824c35e8 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -39,6 +39,7 @@ CONFIG_KVM=m CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y +CONFIG_SPARSEMEM_MANUAL=y CONFIG_BLK_DEV_THROTTLING=y CONFIG_NET=y CONFIG_PACKET=y @@ -123,6 +124,7 @@ CONFIG_MICROSEMI_PHY=y CONFIG_INPUT_MOUSEDEV=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_DW=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_SH_SCI=y CONFIG_VIRTIO_CONSOLE=y @@ -162,6 +164,7 @@ CONFIG_RPMSG_CHAR=y CONFIG_RPMSG_CTRL=y CONFIG_RPMSG_VIRTIO=y CONFIG_ARCH_R9A07G043=y +CONFIG_LIBNVDIMM=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_EXT4_FS_SECURITY=y diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index 21546937db39bf5fb968af60a7ee5b096242925b..fac5742d1c1e6f8e34771291b783ed58c7710251 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -47,6 +47,22 @@ static bool errata_probe_cmo(unsigned int stage, return true; } +static bool errata_probe_pmu(unsigned int stage, + unsigned long arch_id, unsigned long impid) +{ + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PMU)) + return false; + + /* target-c9xx cores report arch_id and impid as 0 */ + if (arch_id != 0 || impid != 0) + return false; + + if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) + return false; + + return true; +} + static u32 thead_errata_probe(unsigned int stage, unsigned long archid, unsigned long impid) { @@ -58,6 +74,9 @@ static u32 thead_errata_probe(unsigned int stage, if (errata_probe_cmo(stage, archid, impid)) cpu_req_errata |= BIT(ERRATA_THEAD_CMO); + if (errata_probe_pmu(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_PMU); + return cpu_req_errata; } diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index ec2f3f1b836f35171616030000209c45660d5f07..7226e2462584b65a78f7601c47e744ed7d6a54fc 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -33,7 +33,7 @@ .endif .endm -.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable +.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable 886 : .option push .option norvc @@ -44,30 +44,14 @@ ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) - -.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ - new_c_2, vendor_id_2, errata_id_2, enable_2 -886 : - .option push - .option norvc - .option norelax - \old_c - .option pop -887 : - ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 +.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2 + ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1 ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 .endm -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2) +#define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__ +#define __ALTERNATIVE_CFG_2(...) ALTERNATIVE_CFG_2 __VA_ARGS__ #else /* !__ASSEMBLY__ */ @@ -109,63 +93,44 @@ "887 :\n" \ ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) - -#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - enable_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - enable_2) \ - "886 :\n" \ - ".option push\n" \ - ".option norvc\n" \ - ".option norelax\n" \ - old_c "\n" \ - ".option pop\n" \ - "887 :\n" \ - ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1) \ +#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + new_c_2, vendor_id_2, errata_id_2, enable_2) \ + __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \ ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - IS_ENABLED(CONFIG_k_1), \ - new_c_2, vendor_id_2, errata_id_2, \ - IS_ENABLED(CONFIG_k_2)) - #endif /* __ASSEMBLY__ */ +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) + +#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2)) + #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLY__ -.macro __ALTERNATIVE_CFG old_c +.macro ALTERNATIVE_CFG old_c \old_c .endm -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ - __ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG(old_c, ...) \ + ALTERNATIVE_CFG old_c -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + ALTERNATIVE_CFG old_c #else /* !__ASSEMBLY__ */ -#define __ALTERNATIVE_CFG(old_c) \ +#define __ALTERNATIVE_CFG(old_c) \ old_c "\n" -#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ +#define _ALTERNATIVE_CFG(old_c, ...) \ __ALTERNATIVE_CFG(old_c) -#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, \ - CONFIG_k_1, \ - new_c_2, vendor_id_2, errata_id_2, \ - CONFIG_k_2) \ - __ALTERNATIVE_CFG(old_c) +#define _ALTERNATIVE_CFG_2(old_c, ...) \ + __ALTERNATIVE_CFG(old_c) #endif /* __ASSEMBLY__ */ #endif /* CONFIG_RISCV_ALTERNATIVE */ @@ -193,13 +158,9 @@ * on the following sample code and then replace ALTERNATIVE() with * ALTERNATIVE_2() to append its customized content. */ -#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) \ - _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, \ - errata_id_1, CONFIG_k_1, \ - new_content_2, vendor_id_2, \ - errata_id_2, CONFIG_k_2) +#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) #endif diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index f6fbe7042f1c847b5bae95f45b1e878617011a71..03e3b95ae6da5db11091827afbefc439862c93c6 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -17,6 +17,13 @@ static inline void local_flush_icache_all(void) static inline void flush_dcache_page(struct page *page) { + /* + * HugeTLB pages are always fully mapped and only head page will be + * set PG_dcache_clean (see comments in flush_icache_pte()). + */ + if (PageHuge(page)) + page = compound_head(page); + if (test_bit(PG_dcache_clean, &page->flags)) clear_bit(PG_dcache_clean, &page->flags); } diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 19a771085781a69da552b1daaa31b6b325a17a9a..4180312d2a70107cd532c1eb2a3bf7ed96f56b37 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -6,6 +6,7 @@ #define ASM_ERRATA_LIST_H #include +#include #include #ifdef CONFIG_ERRATA_SIFIVE @@ -17,7 +18,8 @@ #ifdef CONFIG_ERRATA_THEAD #define ERRATA_THEAD_PBMT 0 #define ERRATA_THEAD_CMO 1 -#define ERRATA_THEAD_NUMBER 2 +#define ERRATA_THEAD_PMU 2 +#define ERRATA_THEAD_NUMBER 3 #endif #define CPUFEATURE_SVPBMT 0 @@ -142,6 +144,18 @@ asm volatile(ALTERNATIVE_2( \ "r"((unsigned long)(_start) + (_size)) \ : "a0") +#define THEAD_C9XX_RV_IRQ_PMU 17 +#define THEAD_C9XX_CSR_SCOUNTEROF 0x5c5 + +#define ALT_SBI_PMU_OVERFLOW(__ovl) \ +asm volatile(ALTERNATIVE( \ + "csrr %0, " __stringify(CSR_SSCOUNTOVF), \ + "csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF), \ + THEAD_VENDOR_ID, ERRATA_THEAD_PMU, \ + CONFIG_ERRATA_THEAD_PMU) \ + : "=r" (__ovl) : \ + : "memory") + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h index a5c2ca1d1cd8ba8a7a958318aa503836ead36ebd..ec19d6afc89653b59d883ba414dbc7ecff6e8397 100644 --- a/arch/riscv/include/asm/hugetlb.h +++ b/arch/riscv/include/asm/hugetlb.h @@ -5,4 +5,10 @@ #include #include +static inline void arch_clear_hugepage_flags(struct page *page) +{ + clear_bit(PG_dcache_clean, &page->flags); +} +#define arch_clear_hugepage_flags arch_clear_hugepage_flags + #endif /* _ASM_RISCV_HUGETLB_H */ diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index b2252529007301767f4dac92f4bc93d128b020b5..86328e3acb02ee3b5752642039c2cdd2806989a1 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -59,8 +59,9 @@ enum riscv_isa_ext_id { RISCV_ISA_EXT_ZIHINTPAUSE, RISCV_ISA_EXT_SSTC, RISCV_ISA_EXT_SVINVAL, - RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX, + RISCV_ISA_EXT_ID_MAX }; +static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX); /* * This enum represents the logical ID for each RISC-V ISA extension static diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h index 92080a227937283b2d094c2d204b979928d9e16d..42497d487a174642bee085b3a950ee6211396fd3 100644 --- a/arch/riscv/include/asm/io.h +++ b/arch/riscv/include/asm/io.h @@ -135,4 +135,9 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw()) #include +#ifdef CONFIG_MMU +#define arch_memremap_wb(addr, size) \ + ((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL)) +#endif + #endif /* _ASM_RISCV_IO_H */ diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index eee260e8ab308e8a033995082e1119df4739eeb3..2b56769cb530cbcc164206a1fe0361a6a9fa013f 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs, #define ARCH_HAS_KIMAGE_ARCH struct kimage_arch { + void *fdt; /* For CONFIG_KEXEC_FILE */ unsigned long fdt_addr; }; @@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, const Elf_Shdr *relsec, const Elf_Shdr *symtab); #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add + +struct kimage; +int arch_kimage_file_post_load_cleanup(struct kimage *image); +#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup #endif #endif diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h index 217ef89f22b9f81f1f4e6de5ad12e16dcbcd9a80..e7882ccb0fd46a67da4e9e1d287066b121c5f188 100644 --- a/arch/riscv/include/asm/kprobes.h +++ b/arch/riscv/include/asm/kprobes.h @@ -40,8 +40,6 @@ void arch_remove_kprobe(struct kprobe *p); int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr); bool kprobe_breakpoint_handler(struct pt_regs *regs); bool kprobe_single_step_handler(struct pt_regs *regs); -void __kretprobe_trampoline(void); -void __kprobes *trampoline_probe_handler(struct pt_regs *regs); #endif /* CONFIG_KPROBES */ #endif /* _ASM_RISCV_KPROBES_H */ diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h index 0099dc1161683ddd1e3c45460309e331b7e6b0a7..5ff1f19fd45c29b4fc7d2c8b44ac4984caf1e75c 100644 --- a/arch/riscv/include/asm/mmu.h +++ b/arch/riscv/include/asm/mmu.h @@ -19,6 +19,8 @@ typedef struct { #ifdef CONFIG_SMP /* A local icache flush is needed before user execution can resume. */ cpumask_t icache_stale_mask; + /* A local tlb flush is needed before user execution can resume. */ + cpumask_t tlb_stale_mask; #endif } mm_context_t; diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index ac70b0fd9a9a35c49a2513840e5d9306e41dced6..9f432c1b528990e8492b35ef5dda19c482a0037d 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -123,20 +123,20 @@ extern phys_addr_t phys_ram_base; ((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE)) #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset)) -#define kernel_mapping_pa_to_va(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) : \ - (void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ +#define kernel_mapping_pa_to_va(y) ({ \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ + (void *)(_y + kernel_map.va_kernel_xip_pa_offset) : \ + (void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ }) #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset) #define kernel_mapping_va_to_pa(y) ({ \ - unsigned long _y = y; \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ - ((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) : \ - ((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ + unsigned long _y = (unsigned long)(y); \ + (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ + (_y - kernel_map.va_kernel_xip_pa_offset) : \ + (_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ }) #define __va_to_pa_nodebug(x) ({ \ diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index dc42375c2357136356df1ca7f42480eeea98a3dc..42a042c0e13ed6ddac188dc8f9940a4008feb6c7 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -25,7 +25,11 @@ extern bool pgtable_l5_enabled; #define PGDIR_MASK (~(PGDIR_SIZE - 1)) /* p4d is folded into pgd in case of 4-level page table */ -#define P4D_SHIFT 39 +#define P4D_SHIFT_L3 30 +#define P4D_SHIFT_L4 39 +#define P4D_SHIFT_L5 39 +#define P4D_SHIFT (pgtable_l5_enabled ? P4D_SHIFT_L5 : \ + (pgtable_l4_enabled ? P4D_SHIFT_L4 : P4D_SHIFT_L3)) #define P4D_SIZE (_AC(1, UL) << P4D_SHIFT) #define P4D_MASK (~(P4D_SIZE - 1)) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 7ee3ac315c7c55e2bc84d636d40376c46de08eee..4eba9a98d0e3d6444245d5bfbeb010225021708b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -415,9 +415,12 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, * Relying on flush_tlb_fix_spurious_fault would suffice, but * the extra traps reduce performance. So, eagerly SFENCE.VMA. */ - local_flush_tlb_page(address); + flush_tlb_page(vma, address); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2a0ef738695ed0847aa02c080489aa2f8f8981a6..4ca7fbacff42494bf3e222ce5a378b507d152383 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -327,4 +327,9 @@ int sbi_err_map_linux_errno(int err); static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; } static inline void sbi_init(void) {} #endif /* CONFIG_RISCV_SBI */ + +unsigned long riscv_cached_mvendorid(unsigned int cpu_id); +unsigned long riscv_cached_marchid(unsigned int cpu_id); +unsigned long riscv_cached_mimpid(unsigned int cpu_id); + #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 801019381dea3fec53f50fe443fb350c3ab92c43..907b9efd39a87dd3853c1f8f21f4baa2fdd1125c 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr) { ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } + +static inline void local_flush_tlb_all_asid(unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma x0, %0" + : + : "r" (asid) + : "memory"); +} + +static inline void local_flush_tlb_page_asid(unsigned long addr, + unsigned long asid) +{ + __asm__ __volatile__ ("sfence.vma %0, %1" + : + : "r" (addr), "r" (asid) + : "memory"); +} + #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) #define local_flush_tlb_page(addr) do { } while (0) diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h index af981426fe0ff7b5c43b63d54670eac0c7f1cf7f..a7644f46d0e56e5f60ec38820b576ae22a4e7b68 100644 --- a/arch/riscv/include/asm/vdso.h +++ b/arch/riscv/include/asm/vdso.h @@ -10,7 +10,7 @@ /* * All systems with an MMU have a VDSO, but systems without an MMU don't - * support shared libraries and therefor don't have one. + * support shared libraries and therefore don't have one. */ #ifdef CONFIG_MMU diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index ff9abc00d1394f58933cb4f90d9b773c795b0f0d..48da5371f1e9acf96a5637df7baadf2d4a67ad20 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -1,4 +1,22 @@ #ifndef _ASM_RISCV_VMALLOC_H #define _ASM_RISCV_VMALLOC_H +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP + +#define IOREMAP_MAX_ORDER (PUD_SHIFT) + +#define arch_vmap_pud_supported arch_vmap_pud_supported +static inline bool arch_vmap_pud_supported(pgprot_t prot) +{ + return true; +} + +#define arch_vmap_pmd_supported arch_vmap_pmd_supported +static inline bool arch_vmap_pmd_supported(pgprot_t prot) +{ + return true; +} + +#endif + #endif /* _ASM_RISCV_VMALLOC_H */ diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h index 44eb993950e5e32fe26267fe99c6071e5ddf1f2d..516bd0bb0da5c1b47f1acb20c937f461526e4db7 100644 --- a/arch/riscv/include/uapi/asm/ucontext.h +++ b/arch/riscv/include/uapi/asm/ucontext.h @@ -15,19 +15,23 @@ struct ucontext { struct ucontext *uc_link; stack_t uc_stack; sigset_t uc_sigmask; - /* There's some padding here to allow sigset_t to be expanded in the + /* + * There's some padding here to allow sigset_t to be expanded in the * future. Though this is unlikely, other architectures put uc_sigmask * at the end of this structure and explicitly state it can be - * expanded, so we didn't want to box ourselves in here. */ + * expanded, so we didn't want to box ourselves in here. + */ __u8 __unused[1024 / 8 - sizeof(sigset_t)]; - /* We can't put uc_sigmask at the end of this structure because we need + /* + * We can't put uc_sigmask at the end of this structure because we need * to be able to expand sigcontext in the future. For example, the * vector ISA extension will almost certainly add ISA state. We want * to ensure all user-visible ISA state can be saved and restored via a * ucontext, so we're putting this at the end in order to allow for * infinite extensibility. Since we know this will be extended and we * assume sigset_t won't be extended an extreme amount, we're - * prioritizing this. */ + * prioritizing this. + */ struct sigcontext uc_mcontext; }; diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index db6e4b1294ba3761bed0a3e1a249dd2ac7e2d25e..4cf303a779ab906c313c9bc91f695bf2911c5687 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -81,6 +81,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 852ecccd8920f7e608616bcaff07694441212ad5..1b9a5a66e55ab8eb0bbd7c228565674bd8b91569 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -70,8 +70,6 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } -#ifdef CONFIG_PROC_FS - struct riscv_cpuinfo { unsigned long mvendorid; unsigned long marchid; @@ -79,6 +77,30 @@ struct riscv_cpuinfo { }; static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +unsigned long riscv_cached_mvendorid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mvendorid; +} +EXPORT_SYMBOL(riscv_cached_mvendorid); + +unsigned long riscv_cached_marchid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->marchid; +} +EXPORT_SYMBOL(riscv_cached_marchid); + +unsigned long riscv_cached_mimpid(unsigned int cpu_id) +{ + struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id); + + return ci->mimpid; +} +EXPORT_SYMBOL(riscv_cached_mimpid); + static int riscv_cpuinfo_starting(unsigned int cpu) { struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo); @@ -113,7 +135,9 @@ static int __init riscv_cpuinfo_init(void) return 0; } -device_initcall(riscv_cpuinfo_init); +arch_initcall(riscv_cpuinfo_init); + +#ifdef CONFIG_PROC_FS #define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \ { \ diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 694267d1fe8149a6a29aab095e441aafaa07b0ec..93e45560af307c21533946fe1e2b328a947d46f3 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -68,21 +69,38 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit) } EXPORT_SYMBOL_GPL(__riscv_isa_extension_available); +static bool riscv_isa_extension_check(int id) +{ + switch (id) { + case RISCV_ISA_EXT_ZICBOM: + if (!riscv_cbom_block_size) { + pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n"); + return false; + } else if (!is_power_of_2(riscv_cbom_block_size)) { + pr_err("cbom-block-size present, but is not a power-of-2\n"); + return false; + } + return true; + } + + return true; +} + void __init riscv_fill_hwcap(void) { struct device_node *node; const char *isa; char print_str[NUM_ALPHA_EXTS + 1]; int i, j, rc; - static unsigned long isa2hwcap[256] = {0}; + unsigned long isa2hwcap[26] = {0}; unsigned long hartid; - isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I; - isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M; - isa2hwcap['a'] = isa2hwcap['A'] = COMPAT_HWCAP_ISA_A; - isa2hwcap['f'] = isa2hwcap['F'] = COMPAT_HWCAP_ISA_F; - isa2hwcap['d'] = isa2hwcap['D'] = COMPAT_HWCAP_ISA_D; - isa2hwcap['c'] = isa2hwcap['C'] = COMPAT_HWCAP_ISA_C; + isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I; + isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M; + isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A; + isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F; + isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D; + isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C; elf_hwcap = 0; @@ -189,15 +207,20 @@ void __init riscv_fill_hwcap(void) #define SET_ISA_EXT_MAP(name, bit) \ do { \ if ((ext_end - ext == sizeof(name) - 1) && \ - !memcmp(ext, name, sizeof(name) - 1)) \ + !memcmp(ext, name, sizeof(name) - 1) && \ + riscv_isa_extension_check(bit)) \ set_bit(bit, this_isa); \ } while (false) \ if (unlikely(ext_err)) continue; if (!ext_long) { - this_hwcap |= isa2hwcap[(unsigned char)(*ext)]; - set_bit(*ext - 'a', this_isa); + int nr = *ext - 'a'; + + if (riscv_isa_extension_check(nr)) { + this_hwcap |= isa2hwcap[nr]; + set_bit(nr, this_isa); + } } else { SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c new file mode 100644 index 0000000000000000000000000000000000000000..b351a3c013555650b6ad4f75e0b7e2de28c8ba81 --- /dev/null +++ b/arch/riscv/kernel/crash_core.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +void arch_crash_save_vmcoreinfo(void) +{ + VMCOREINFO_NUMBER(VA_BITS); + VMCOREINFO_NUMBER(phys_ram_base); + + vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); +#ifdef CONFIG_64BIT + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif + vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +} diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 0cb94992c15b32a852716ca877d95b5f14f63c6f..5372b708fae21a072f72434625d27d792b83abc7 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -21,6 +21,18 @@ #include #include +int arch_kimage_file_post_load_cleanup(struct kimage *image) +{ + kvfree(image->arch.fdt); + image->arch.fdt = NULL; + + vfree(image->elf_headers); + image->elf_headers = NULL; + image->elf_headers_sz = 0; + + return kexec_image_post_load_cleanup_default(image); +} + static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr, struct kexec_elf_info *elf_info, unsigned long old_pbase, unsigned long new_pbase) @@ -298,6 +310,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, pr_err("Error add DTB kbuf ret=%d\n", ret); goto out_free_fdt; } + /* Cache the fdt buffer address for memory cleanup */ + image->arch.fdt = fdt; pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem); goto out; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 186abd146eaffca06c6c3db10902c42ffa769dbc..99d38fdf8b18f2f18fe3bae2dce2d7a3e5b11b5a 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -248,7 +248,7 @@ ret_from_syscall_rejected: andi t0, t0, _TIF_SYSCALL_WORK bnez t0, handle_syscall_trace_exit -ret_from_exception: +SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) csrc CSR_STATUS, SR_IE #ifdef CONFIG_TRACE_IRQFLAGS @@ -262,13 +262,13 @@ ret_from_exception: andi s0, s0, SR_SPP #endif bnez s0, resume_kernel +SYM_CODE_END(ret_from_exception) -resume_userspace: /* Interrupts must be disabled here so flags are checked atomically */ REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ andi s1, s0, _TIF_WORK_MASK - bnez s1, work_pending - + bnez s1, resume_userspace_slow +resume_userspace: #ifdef CONFIG_CONTEXT_TRACKING_USER call user_enter_callable #endif @@ -368,19 +368,12 @@ resume_kernel: j restore_all #endif -work_pending: +resume_userspace_slow: /* Enter slow path for supplementary processing */ - la ra, ret_from_exception - andi s1, s0, _TIF_NEED_RESCHED - bnez s1, work_resched -work_notifysig: - /* Handle pending signals and notify-resume requests */ - csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */ move a0, sp /* pt_regs */ move a1, s0 /* current_thread_info->flags */ - tail do_notify_resume -work_resched: - tail schedule + call do_work_pending + j resume_userspace /* Slow paths for ptrace. */ handle_syscall_trace_enter: diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 6d462681c9c0238e012daa973a3441a849ed34a9..30102aadc4d73ce463dce7b88bc7e73c124a109c 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -15,8 +15,8 @@ .macro SAVE_ABI_STATE addi sp, sp, -16 - sd s0, 0(sp) - sd ra, 8(sp) + REG_S s0, 0*SZREG(sp) + REG_S ra, 1*SZREG(sp) addi s0, sp, 16 .endm @@ -25,24 +25,26 @@ * register if a0 was not saved. */ .macro SAVE_RET_ABI_STATE - addi sp, sp, -32 - sd s0, 16(sp) - sd ra, 24(sp) - sd a0, 8(sp) - addi s0, sp, 32 + addi sp, sp, -4*SZREG + REG_S s0, 2*SZREG(sp) + REG_S ra, 3*SZREG(sp) + REG_S a0, 1*SZREG(sp) + REG_S a1, 0*SZREG(sp) + addi s0, sp, 4*SZREG .endm .macro RESTORE_ABI_STATE - ld ra, 8(sp) - ld s0, 0(sp) + REG_L ra, 1*SZREG(sp) + REG_L s0, 0*SZREG(sp) addi sp, sp, 16 .endm .macro RESTORE_RET_ABI_STATE - ld ra, 24(sp) - ld s0, 16(sp) - ld a0, 8(sp) - addi sp, sp, 32 + REG_L ra, 3*SZREG(sp) + REG_L s0, 2*SZREG(sp) + REG_L a0, 1*SZREG(sp) + REG_L a1, 0*SZREG(sp) + addi sp, sp, 4*SZREG .endm ENTRY(ftrace_stub) @@ -71,9 +73,9 @@ ENTRY(return_to_handler) mv a0, t6 #endif call ftrace_return_to_handler - mv a1, a0 + mv a2, a0 RESTORE_RET_ABI_STATE - jalr a1 + jalr a2 ENDPROC(return_to_handler) #endif @@ -82,16 +84,16 @@ ENTRY(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return - ld t1, 0(t0) + REG_L t1, 0(t0) bne t1, t4, do_ftrace_graph_caller la t3, ftrace_graph_entry - ld t2, 0(t3) + REG_L t2, 0(t3) la t6, ftrace_graph_entry_stub bne t2, t6, do_ftrace_graph_caller #endif la t3, ftrace_trace_function - ld t5, 0(t3) + REG_L t5, 0(t3) bne t5, t4, do_trace ret @@ -101,10 +103,10 @@ ENTRY(MCOUNT_NAME) * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller) */ do_ftrace_graph_caller: - addi a0, s0, -8 + addi a0, s0, -SZREG mv a1, ra #ifdef HAVE_FUNCTION_GRAPH_FP_TEST - ld a2, -16(s0) + REG_L a2, -2*SZREG(s0) #endif SAVE_ABI_STATE call prepare_ftrace_return @@ -117,7 +119,7 @@ do_ftrace_graph_caller: * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller) */ do_trace: - ld a1, -8(s0) + REG_L a1, -SZREG(s0) mv a0, ra SAVE_ABI_STATE diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile index 7f0840dcc31bc78eeecd6c3ff45a5a645f2b2283..c40139e9ca4781e414946bd9e58648f15c89d25a 100644 --- a/arch/riscv/kernel/probes/Makefile +++ b/arch/riscv/kernel/probes/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_KPROBES) += kprobes.o decode-insn.o simulate-insn.o -obj-$(CONFIG_KPROBES) += kprobes_trampoline.o +obj-$(CONFIG_RETHOOK) += rethook.o rethook_trampoline.o obj-$(CONFIG_KPROBES_ON_FTRACE) += ftrace.o obj-$(CONFIG_UPROBES) += uprobes.o decode-insn.o simulate-insn.o CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE) diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index e6e950b7cf32742ed5902b6cd74f6249e0bde94d..f21592d203062ac2f9db083364f2e7d526e0ba8e 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -345,19 +345,6 @@ int __init arch_populate_kprobe_blacklist(void) return ret; } -void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs) -{ - return (void *)kretprobe_trampoline_handler(regs, NULL); -} - -void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, - struct pt_regs *regs) -{ - ri->ret_addr = (kprobe_opcode_t *)regs->ra; - ri->fp = NULL; - regs->ra = (unsigned long) &__kretprobe_trampoline; -} - int __kprobes arch_trampoline_kprobe(struct kprobe *p) { return 0; diff --git a/arch/riscv/kernel/probes/rethook.c b/arch/riscv/kernel/probes/rethook.c new file mode 100644 index 0000000000000000000000000000000000000000..5c27c1f509894d69825d71196182837fb79ca000 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Generic return hook for riscv. + */ + +#include +#include +#include "rethook.h" + +/* This is called from arch_rethook_trampoline() */ +unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs) +{ + return rethook_trampoline_handler(regs, regs->s0); +} + +NOKPROBE_SYMBOL(arch_rethook_trampoline_callback); + +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount) +{ + rhn->ret_addr = regs->ra; + rhn->frame = regs->s0; + + /* replace return addr with trampoline */ + regs->ra = (unsigned long)arch_rethook_trampoline; +} + +NOKPROBE_SYMBOL(arch_rethook_prepare); diff --git a/arch/riscv/kernel/probes/rethook.h b/arch/riscv/kernel/probes/rethook.h new file mode 100644 index 0000000000000000000000000000000000000000..4758f7e3ce881f78d0134e058223f03580736194 --- /dev/null +++ b/arch/riscv/kernel/probes/rethook.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __RISCV_RETHOOK_H +#define __RISCV_RETHOOK_H + +unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs); +void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount); + +#endif diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S similarity index 94% rename from arch/riscv/kernel/probes/kprobes_trampoline.S rename to arch/riscv/kernel/probes/rethook_trampoline.S index 7bdb09ded39b8c3b1dc3f064c51bf65d1b72c801..21bac92a170a9bd43be973f213cb8be3329175ef 100644 --- a/arch/riscv/kernel/probes/kprobes_trampoline.S +++ b/arch/riscv/kernel/probes/rethook_trampoline.S @@ -75,13 +75,13 @@ REG_L x31, PT_T6(sp) .endm -ENTRY(__kretprobe_trampoline) +ENTRY(arch_rethook_trampoline) addi sp, sp, -(PT_SIZE_ON_STACK) save_all_base_regs move a0, sp /* pt_regs */ - call trampoline_probe_handler + call arch_rethook_trampoline_callback /* use the result as the return-address */ move ra, a0 @@ -90,4 +90,4 @@ ENTRY(__kretprobe_trampoline) addi sp, sp, PT_SIZE_ON_STACK ret -ENDPROC(__kretprobe_trampoline) +ENDPROC(arch_rethook_trampoline) diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 5c591123c440910594f0051be1bb6387d72d05d6..bfb2afa4135f89690b90da20d281252a539d8cda 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -313,19 +313,27 @@ static void do_signal(struct pt_regs *regs) } /* - * notification of userspace execution resumption - * - triggered by the _TIF_WORK_MASK flags + * Handle any pending work on the resume-to-userspace path, as indicated by + * _TIF_WORK_MASK. Entered from assembly with IRQs off. */ -asmlinkage __visible void do_notify_resume(struct pt_regs *regs, - unsigned long thread_info_flags) +asmlinkage __visible void do_work_pending(struct pt_regs *regs, + unsigned long thread_info_flags) { - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - - /* Handle pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) - do_signal(regs); - - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); + do { + if (thread_info_flags & _TIF_NEED_RESCHED) { + schedule(); + } else { + local_irq_enable(); + if (thread_info_flags & _TIF_UPROBE) + uprobe_notify_resume(regs); + /* Handle pending signal delivery */ + if (thread_info_flags & (_TIF_SIGPENDING | + _TIF_NOTIFY_SIGNAL)) + do_signal(regs); + if (thread_info_flags & _TIF_NOTIFY_RESUME) + resume_user_mode_work(regs); + } + local_irq_disable(); + thread_info_flags = read_thread_flags(); + } while (thread_info_flags & _TIF_WORK_MASK); } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 08d11a53f39e7a767a643b27b996b2ce36a3fb16..75c8dd64fc48e02ba56942d2d1a029ff88c061a4 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -16,6 +16,8 @@ #ifdef CONFIG_FRAME_POINTER +extern asmlinkage void ret_from_exception(void); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(void *, unsigned long), void *arg) { @@ -58,7 +60,14 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, } else { fp = frame->fp; pc = ftrace_graph_ret_addr(current, NULL, frame->ra, - (unsigned long *)(fp - 8)); + &frame->ra); + if (pc == (unsigned long)ret_from_exception) { + if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc))) + break; + + pc = ((struct pt_regs *)sp)->epc; + fp = ((struct pt_regs *)sp)->s0; + } } } diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 7abd8e4c4df63e29ec4b88ca31900e0bce377483..549bde5c970a13e9412148c7c45aca1ef2034825 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -208,18 +208,18 @@ int is_valid_bugaddr(unsigned long pc) #endif /* CONFIG_GENERIC_BUG */ #ifdef CONFIG_VMAP_STACK +/* + * Extra stack space that allows us to provide panic messages when the kernel + * has overflowed its stack. + */ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack)__aligned(16); /* - * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used - * to get per-cpu overflow stack(get_overflow_stack). + * A temporary stack for use by handle_kernel_stack_overflow. This is used so + * we can call into C code to get the per-hart overflow stack. Usage of this + * stack must be protected by spin_shadow_stack. */ -long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)]; -asmlinkage unsigned long get_overflow_stack(void) -{ - return (unsigned long)this_cpu_ptr(overflow_stack) + - OVERFLOW_STACK_SIZE; -} +long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16); /* * A pseudo spinlock to protect the shadow stack from being used by multiple @@ -230,6 +230,12 @@ asmlinkage unsigned long get_overflow_stack(void) */ unsigned long spin_shadow_stack; +asmlinkage unsigned long get_overflow_stack(void) +{ + return (unsigned long)this_cpu_ptr(overflow_stack) + + OVERFLOW_STACK_SIZE; +} + asmlinkage void handle_bad_stack(struct pt_regs *regs) { unsigned long tsk_stk = (unsigned long)current->stack; diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index d76aabf4b94d6a8bf4ef20eab680de4d4be16ff9..2ac177c053520b5fa2f6eb3c4f2006f494c8fd3c 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -13,6 +13,8 @@ obj-y += extable.o obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o +obj-y += pgtable.o +obj-y += pmem.o ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c index 57b40a3504206411ac8e152b6a13d8e183477c43..3cc07ed45aeb3c3e68399c01f046f62856a58b61 100644 --- a/arch/riscv/mm/cacheflush.c +++ b/arch/riscv/mm/cacheflush.c @@ -83,6 +83,13 @@ void flush_icache_pte(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_and_set_bit(PG_dcache_clean, &page->flags)) flush_icache_all(); } diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c index 7acbfbd14557e600ba69b888c7389fef64f72428..80ce9caba8d225979426f01f9a636d11890f9de6 100644 --- a/arch/riscv/mm/context.c +++ b/arch/riscv/mm/context.c @@ -196,6 +196,16 @@ switch_mm_fast: if (need_flush_tlb) local_flush_tlb_all(); +#ifdef CONFIG_SMP + else { + cpumask_t *mask = &mm->context.tlb_stale_mask; + + if (cpumask_test_cpu(cpu, mask)) { + cpumask_clear_cpu(cpu, mask); + local_flush_tlb_all_asid(cntx & asid_mask); + } + } +#endif } static void set_mm_noasid(struct mm_struct *mm) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 50a1b6edd491823517fd9c31948c3a9e2d4d7caa..478d6763a01a1ebde626635a2f7703f4415649a7 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -672,10 +672,11 @@ void __init create_pgd_mapping(pgd_t *pgdp, static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) { /* Upgrade to PMD_SIZE mappings whenever possible */ - if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1))) - return PAGE_SIZE; + base &= PMD_SIZE - 1; + if (!base && size >= PMD_SIZE) + return PMD_SIZE; - return PMD_SIZE; + return PAGE_SIZE; } #ifdef CONFIG_XIP_KERNEL @@ -926,15 +927,15 @@ static void __init pt_ops_set_early(void) */ static void __init pt_ops_set_fixmap(void) { - pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap); - pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap); + pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap); + pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap); #ifndef __PAGETABLE_PMD_FOLDED - pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap); - pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap); - pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap); - pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap); - pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap); - pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap); + pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap); + pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap); + pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap); + pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap); + pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap); + pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap); #endif } @@ -1110,9 +1111,9 @@ static void __init setup_vm_final(void) if (end >= __pa(PAGE_OFFSET) + memory_limit) end = __pa(PAGE_OFFSET) + memory_limit; - map_size = best_map_size(start, end - start); for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); + map_size = best_map_size(pa, end - pa); create_pgd_mapping(swapper_pg_dir, va, pa, map_size, pgprot_from_va(va)); diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c new file mode 100644 index 0000000000000000000000000000000000000000..6645ead1a7c16d40dbe232c5358b4a484e10909f --- /dev/null +++ b/arch/riscv/mm/pgtable.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) +{ + return 0; +} + +void p4d_clear_huge(p4d_t *p4d) +{ +} + +int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot) +{ + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), prot); + + set_pud(pud, new_pud); + return 1; +} + +int pud_clear_huge(pud_t *pud) +{ + if (!pud_leaf(READ_ONCE(*pud))) + return 0; + pud_clear(pud); + return 1; +} + +int pud_free_pmd_page(pud_t *pud, unsigned long addr) +{ + pmd_t *pmd = pud_pgtable(*pud); + int i; + + pud_clear(pud); + + flush_tlb_kernel_range(addr, addr + PUD_SIZE); + + for (i = 0; i < PTRS_PER_PMD; i++) { + if (!pmd_none(pmd[i])) { + pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]); + + pte_free_kernel(NULL, pte); + } + } + + pmd_free(NULL, pmd); + + return 1; +} + +int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot) +{ + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), prot); + + set_pmd(pmd, new_pmd); + return 1; +} + +int pmd_clear_huge(pmd_t *pmd) +{ + if (!pmd_leaf(READ_ONCE(*pmd))) + return 0; + pmd_clear(pmd); + return 1; +} + +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) +{ + pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd); + + pmd_clear(pmd); + + flush_tlb_kernel_range(addr, addr + PMD_SIZE); + pte_free_kernel(NULL, pte); + return 1; +} + +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index 19cf25a74ee29dc4fafb31fa1ef5eda17255906c..9b18bda74154e8a3b2b807cd0ae2065eb7c28612 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -22,7 +22,7 @@ EXPORT_SYMBOL(__virt_to_phys); phys_addr_t __phys_addr_symbol(unsigned long x) { unsigned long kernel_start = kernel_map.virt_addr; - unsigned long kernel_end = (unsigned long)_end; + unsigned long kernel_end = kernel_start + kernel_map.size; /* * Boundary checking aginst the kernel image mapping. diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c new file mode 100644 index 0000000000000000000000000000000000000000..089df92ae8760a4e27d3c53c0ef05e5f4035eae3 --- /dev/null +++ b/arch/riscv/mm/pmem.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2022 Ventana Micro Systems Inc. + */ + +#include +#include + +#include + +void arch_wb_cache_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_wb_cache_pmem); + +void arch_invalidate_pmem(void *addr, size_t size) +{ + ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size); +} +EXPORT_SYMBOL_GPL(arch_invalidate_pmem); diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 37ed760d007c3eef9c302adda361cd4ad7d1db9d..ce7dfc81bb3fe386748557f44310aa4b1a86f3a9 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -5,23 +5,7 @@ #include #include #include - -static inline void local_flush_tlb_all_asid(unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma x0, %0" - : - : "r" (asid) - : "memory"); -} - -static inline void local_flush_tlb_page_asid(unsigned long addr, - unsigned long asid) -{ - __asm__ __volatile__ ("sfence.vma %0, %1" - : - : "r" (addr), "r" (asid) - : "memory"); -} +#include void flush_tlb_all(void) { @@ -31,6 +15,7 @@ void flush_tlb_all(void) static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, unsigned long size, unsigned long stride) { + struct cpumask *pmask = &mm->context.tlb_stale_mask; struct cpumask *cmask = mm_cpumask(mm); unsigned int cpuid; bool broadcast; @@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start, if (static_branch_unlikely(&use_asid_allocator)) { unsigned long asid = atomic_long_read(&mm->context.id); + /* + * TLB will be immediately flushed on harts concurrently + * executing this MM context. TLB flush on other harts + * is deferred until this MM context migrates there. + */ + cpumask_setall(pmask); + cpumask_clear_cpu(cpuid, pmask); + cpumask_andnot(pmask, pmask, cmask); + if (broadcast) { sbi_remote_sfence_vma_asid(cmask, start, size, asid); } else if (size <= stride) { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index b1e98a9ed152b91365f014c7a54b3b5cfbeff459..d67ce719d16a994e8bbfc36ea53d5ffb69e4a50b 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -142,8 +142,7 @@ struct mcck_volatile_info { CR14_EXTERNAL_DAMAGE_SUBMASK) #define SIDAD_SIZE_MASK 0xff -#define sida_origin(sie_block) \ - ((sie_block)->sidad & PAGE_MASK) +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) #define sida_size(sie_block) \ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) @@ -276,6 +275,7 @@ struct kvm_s390_sie_block { #define ECB3_AES 0x04 #define ECB3_RI 0x01 __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU __u32 scaol; /* 0x0064 */ __u8 sdf; /* 0x0068 */ __u8 epdx; /* 0x0069 */ @@ -942,6 +942,8 @@ struct kvm_s390_pv { unsigned long stor_base; void *stor_var; bool dumping; + void *set_aside; + struct list_head need_cleanup; struct mmu_notifier mmu_notifier; }; @@ -1017,7 +1019,13 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); -extern int sie64a(struct kvm_s390_sie_block *, u64 *); +int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa); + +static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) +{ + return __sie64a(virt_to_phys(sie_block), sie_block, rsa); +} + extern char sie_exit; extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h index 08a8b96606d7eb3eaab19ebe498f6d33f91aadee..b85e13505a0f2e7a771199160b4524718ca39116 100644 --- a/arch/s390/include/asm/mem_encrypt.h +++ b/arch/s390/include/asm/mem_encrypt.h @@ -4,8 +4,8 @@ #ifndef __ASSEMBLY__ -int set_memory_encrypted(unsigned long addr, int numpages); -int set_memory_decrypted(unsigned long addr, int numpages); +int set_memory_encrypted(unsigned long vaddr, int numpages); +int set_memory_decrypted(unsigned long vaddr, int numpages); #endif /* __ASSEMBLY__ */ diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h index b23c658dce77f19e6b03a9fdb093e6fdf913f9c9..1802be5abb5dc3256de62bcb07b509f3b3afd3b3 100644 --- a/arch/s390/include/asm/stacktrace.h +++ b/arch/s390/include/asm/stacktrace.h @@ -46,6 +46,7 @@ struct stack_frame { unsigned long sie_savearea; unsigned long sie_reason; unsigned long sie_flags; + unsigned long sie_control_block_phys; }; }; unsigned long gprs[10]; diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index be3ef9dd697265dad3dbc72fc58edacf14755dc9..28a9ad57b6f16ca30cf2be582eb047425bc7827b 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -34,6 +34,7 @@ #define UVC_CMD_INIT_UV 0x000f #define UVC_CMD_CREATE_SEC_CONF 0x0100 #define UVC_CMD_DESTROY_SEC_CONF 0x0101 +#define UVC_CMD_DESTROY_SEC_CONF_FAST 0x0102 #define UVC_CMD_CREATE_SEC_CPU 0x0120 #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 @@ -81,6 +82,7 @@ enum uv_cmds_inst { BIT_UVC_CMD_UNSHARE_ALL = 20, BIT_UVC_CMD_PIN_PAGE_SHARED = 21, BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, + BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23, BIT_UVC_CMD_DUMP_INIT = 24, BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25, BIT_UVC_CMD_DUMP_CPU = 26, @@ -230,6 +232,14 @@ struct uv_cb_nodata { u64 reserved20[4]; } __packed __aligned(8); +/* Destroy Configuration Fast */ +struct uv_cb_destroy_fast { + struct uv_cb_header header; + u64 reserved08[2]; + u64 handle; + u64 reserved20[5]; +} __packed __aligned(8); + /* Set Shared Access */ struct uv_cb_share { struct uv_cb_header header; diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index d8ce965c0a97c6cbf5668ca247e936f1362bad2f..3f8e760298c2201c3bf207433d2e25ecdcca2dd8 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -62,6 +62,7 @@ int main(void) OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea); OFFSET(__SF_SIE_REASON, stack_frame, sie_reason); OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags); + OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys); DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame)); BLANK(); /* idle data offsets */ diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index e0d11f3adfccd05dfec8fef7dc2b920eef67cf62..0f423e9df09565b20c3442309e5d031ee55fdc16 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -207,18 +207,20 @@ ENDPROC(__switch_to) #if IS_ENABLED(CONFIG_KVM) /* - * sie64a calling convention: - * %r2 pointer to sie control block - * %r3 guest register save area + * __sie64a calling convention: + * %r2 pointer to sie control block phys + * %r3 pointer to sie control block virt + * %r4 guest register save area */ -ENTRY(sie64a) +ENTRY(__sie64a) stmg %r6,%r14,__SF_GPRS(%r15) # save kernel registers lg %r12,__LC_CURRENT - stg %r2,__SF_SIE_CONTROL(%r15) # save control block pointer - stg %r3,__SF_SIE_SAVEAREA(%r15) # save guest register save area + stg %r2,__SF_SIE_CONTROL_PHYS(%r15) # save sie block physical.. + stg %r3,__SF_SIE_CONTROL(%r15) # ...and virtual addresses + stg %r4,__SF_SIE_SAVEAREA(%r15) # save guest register save area xc __SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0 mvc __SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags - lmg %r0,%r13,0(%r3) # load guest gprs 0-13 + lmg %r0,%r13,0(%r4) # load guest gprs 0-13 lg %r14,__LC_GMAP # get gmap pointer ltgr %r14,%r14 jz .Lsie_gmap @@ -230,6 +232,7 @@ ENTRY(sie64a) jnz .Lsie_skip TSTMSK __LC_CPU_FLAGS,_CIF_FPU jo .Lsie_skip # exit if fp/vx regs changed + lg %r14,__SF_SIE_CONTROL_PHYS(%r15) # get sie block phys addr BPEXIT __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_entry: sie 0(%r14) @@ -240,13 +243,14 @@ ENTRY(sie64a) BPOFF BPENTER __SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST) .Lsie_skip: + lg %r14,__SF_SIE_CONTROL(%r15) # get control block pointer ni __SIE_PROG0C+3(%r14),0xfe # no longer in SIE lctlg %c1,%c1,__LC_KERNEL_ASCE # load primary asce .Lsie_done: # some program checks are suppressing. C code (e.g. do_protection_exception) # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable. -# Other instructions between sie64a and .Lsie_done should not cause program +# Other instructions between __sie64a and .Lsie_done should not cause program # interrupts. So lets use 3 nops as a landing pad for all possible rewinds. .Lrewind_pad6: nopr 7 @@ -275,8 +279,8 @@ sie_exit: EX_TABLE(.Lrewind_pad4,.Lsie_fault) EX_TABLE(.Lrewind_pad2,.Lsie_fault) EX_TABLE(sie_exit,.Lsie_fault) -ENDPROC(sie64a) -EXPORT_SYMBOL(sie64a) +ENDPROC(__sie64a) +EXPORT_SYMBOL(__sie64a) EXPORT_SYMBOL(sie_exit) #endif @@ -355,7 +359,7 @@ ENTRY(pgm_check_handler) j 3f # -> fault in user space .Lpgm_skip_asce: #if IS_ENABLED(CONFIG_KVM) - # cleanup critical section for program checks in sie64a + # cleanup critical section for program checks in __sie64a OUTSIDE %r9,.Lsie_gmap,.Lsie_done,1f SIEEXIT lghi %r10,_PIF_GUEST_FAULT diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index f9810d2a267c68b37bcb940e59d3896444ef024e..9f18a4af9c13198468602bc56f22342b5e4a373b 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -255,6 +255,13 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr, */ static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) { + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications)) + return false; if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) return false; return atomic_read(&mm->context.protected_count) > 1; diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 88112065d94116f852fdbcc9b1a8d8241a6fca6e..0ee02dae14b2bdc1294d4685825f25f042715fac 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu) return 0; if (current->thread.per_flags & PER_FLAG_NO_TE) return 0; - itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + itdb = phys_to_virt(vcpu->arch.sie_block->itdba); rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); if (rc) return rc; @@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)(sida_origin(vcpu->arch.sie_block)), - sctns, PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE); } else { r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); if (r) { @@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) static int handle_pv_spx(struct kvm_vcpu *vcpu) { - u32 pref = *(u32 *)vcpu->arch.sie_block->sidad; + u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block); kvm_s390_set_prefix(vcpu, pref); trace_kvm_s390_handle_prefix(vcpu, 1, pref); @@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu) static int handle_pv_uvc(struct kvm_vcpu *vcpu) { - struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad; + struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block); struct uv_cb_cts uvcb = { .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED, .header.len = sizeof(uvcb), diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ab569faf0df241a8bbc03a05e9aa28870c22b61c..1dae78deddf28602d32d051dc5ad956be6dd5315 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -314,11 +314,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) return READ_ONCE(gisa->ipm); } -static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) -{ - clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); -} - static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h deleted file mode 100644 index 484608c71dd01185b88f9db1a11e3f523bcefebf..0000000000000000000000000000000000000000 --- a/arch/s390/kvm/irq.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * s390 irqchip routines - * - * Copyright IBM Corp. 2014 - * - * Author(s): Cornelia Huck - */ -#ifndef __KVM_IRQ_H -#define __KVM_IRQ_H - -#include - -static inline int irqchip_in_kernel(struct kvm *kvm) -{ - return 1; -} - -#endif diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bc491a73815c34803d56060d798f5620446bba8b..e4890e04b2108cdd484fb0288f4ae203797c3c17 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -209,6 +209,14 @@ unsigned int diag9c_forwarding_hz; module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); +/* + * allow asynchronous deinit for protected guests; enable by default since + * the feature is opt-in anyway + */ +static int async_destroy = 1; +module_param(async_destroy, int, 0444); +MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests"); + /* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond @@ -616,6 +624,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_BPB: r = test_facility(82); break; + case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE: + r = async_destroy && is_prot_virt_host(); + break; case KVM_CAP_S390_PROTECTED: r = is_prot_virt_host(); break; @@ -2519,9 +2530,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) { + const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM); + void __user *argp = (void __user *)cmd->data; int r = 0; u16 dummy; - void __user *argp = (void __user *)cmd->data; + + if (need_lock) + mutex_lock(&kvm->lock); switch (cmd->cmd) { case KVM_PV_ENABLE: { @@ -2555,6 +2570,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break; } + case KVM_PV_ASYNC_CLEANUP_PREPARE: + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) + break; + + r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); + /* + * If a CPU could not be destroyed, destroy VM will also fail. + * There is no point in trying to destroy it. Instead return + * the rc and rrc from the first CPU that failed destroying. + */ + if (r) + break; + r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc); + + /* no need to block service interrupts any more */ + clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + case KVM_PV_ASYNC_CLEANUP_PERFORM: + r = -EINVAL; + if (!async_destroy) + break; + /* kvm->lock must not be held; this is asserted inside the function. */ + r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); + break; case KVM_PV_DISABLE: { r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) @@ -2568,7 +2608,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) */ if (r) break; - r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); + r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc); /* no need to block service interrupts any more */ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); @@ -2718,6 +2758,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) default: r = -ENOTTY; } + if (need_lock) + mutex_unlock(&kvm->lock); + return r; } @@ -2922,9 +2965,8 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EINVAL; break; } - mutex_lock(&kvm->lock); + /* must be called without kvm->lock */ r = kvm_s390_handle_pv(kvm, &args); - mutex_unlock(&kvm->lock); if (copy_to_user(argp, &args, sizeof(args))) { r = -EFAULT; break; @@ -3243,6 +3285,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_vsie_init(kvm); if (use_gisa) kvm_s390_gisa_init(kvm); + INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -3287,11 +3331,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* * We are already at the end of life and kvm->lock is not taken. * This is ok as the file descriptor is closed by now and nobody - * can mess with the pv state. To avoid lockdep_assert_held from - * complaining we do not use kvm_s390_pv_is_protected. + * can mess with the pv state. */ - if (kvm_s390_pv_get_handle(kvm)) - kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); + kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); /* * Remove the mmu notifier only when the whole KVM VM is torn down, * and only if one was registered to begin with. If the VM is @@ -3344,28 +3386,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) static void sca_add_vcpu(struct kvm_vcpu *vcpu) { if (!kvm_s390_use_sca_entries()) { - struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; return; } read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); } read_unlock(&vcpu->kvm->arch.sca_lock); @@ -3396,6 +3440,7 @@ static int sca_switch_to_extended(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long vcpu_idx; u32 scaol, scaoh; + phys_addr_t new_sca_phys; if (kvm->arch.use_esca) return 0; @@ -3404,8 +3449,9 @@ static int sca_switch_to_extended(struct kvm *kvm) if (!new_sca) return -ENOMEM; - scaoh = (u32)((u64)(new_sca) >> 32); - scaol = (u32)(u64)(new_sca) & ~0x3fU; + new_sca_phys = virt_to_phys(new_sca); + scaoh = new_sca_phys >> 32; + scaol = new_sca_phys & ESCA_SCAOL_MASK; kvm_s390_vcpu_block_all(kvm); write_lock(&kvm->arch.sca_lock); @@ -3625,15 +3671,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) { - free_page(vcpu->arch.sie_block->cbrlo); + free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo)); vcpu->arch.sie_block->cbrlo = 0; } int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.sie_block->cbrlo) + void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + + if (!cbrlo_page) return -ENOMEM; + + vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page); return 0; } @@ -3643,7 +3692,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ibc = model->ibc; if (test_kvm_facility(vcpu->kvm, 7)) - vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; + vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list); } static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) @@ -3700,9 +3749,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); } - vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) - | SDNXC; - vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; + vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC; + vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb); if (sclp.has_kss) kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); @@ -3752,7 +3800,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; - vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb); /* the real guest size will always be smaller than msl */ vcpu->arch.sie_block->mso = 0; @@ -5169,6 +5217,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + void *sida_addr; int r = 0; if (mop->flags || !mop->size) @@ -5180,16 +5229,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, if (!kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; + sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset; + switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: - if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), mop->size)) + if (copy_to_user(uaddr, sida_addr, mop->size)) r = -EFAULT; break; case KVM_S390_MEMOP_SIDA_WRITE: - if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), uaddr, mop->size)) + if (copy_from_user(sida_addr, uaddr, mop->size)) r = -EFAULT; break; } @@ -5567,6 +5616,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return true; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 4755492dfabc65037608b84f2fdba29b4e8f66f6..d48588c207d8ba87dc3cb5fa37f7b11bbec299e9 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -23,7 +23,8 @@ /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 -#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +#define IS_ITDB_VALID(vcpu) \ + ((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1)) extern debug_info_t *kvm_s390_dbf; extern debug_info_t *kvm_s390_dbf_uv; @@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = (u32)(u64)kvm->arch.gisa_int.origin; + u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; @@ -243,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3335fa09b6f1d205a518beb3b84848225966b2e5..9f8a192bd750f9b4150779aabbaed2000c962cab 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return -EREMOTE; } if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, - PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE); rc = 0; } else { rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7cb7799a0acb4000157554df8beec25634c6084b..e032ebbf51b976827e3c48934a72e49e90db111e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,29 @@ #include #include "kvm-s390.h" +/** + * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to + * be destroyed + * + * @list: list head for the list of leftover VMs + * @old_gmap_table: the gmap table of the leftover protected VM + * @handle: the handle of the leftover protected VM + * @stor_var: pointer to the variable storage of the leftover protected VM + * @stor_base: address of the base storage of the leftover protected VM + * + * Represents a protected VM that is still registered with the Ultravisor, + * but which does not correspond any longer to an active KVM VM. It should + * be destroyed at some point later, either asynchronously or when the + * process terminates. + */ +struct pv_vm_to_be_destroyed { + struct list_head list; + unsigned long old_gmap_table; + u64 handle; + void *stor_var; + unsigned long stor_base; +}; + static void kvm_s390_clear_pv_state(struct kvm *kvm) { kvm->arch.pv.handle = 0; @@ -44,7 +67,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); - free_page(sida_origin(vcpu->arch.sie_block)); + free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); vcpu->arch.sie_block->pv_handle_cpu = 0; vcpu->arch.sie_block->pv_handle_config = 0; memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); @@ -66,6 +89,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) .header.cmd = UVC_CMD_CREATE_SEC_CPU, .header.len = sizeof(uvcb), }; + void *sida_addr; int cc; if (kvm_s390_pv_cpu_get_handle(vcpu)) @@ -79,16 +103,17 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) /* Input */ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); uvcb.num = vcpu->arch.sie_block->icpua; - uvcb.state_origin = (u64)vcpu->arch.sie_block; - uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); + uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vcpu->arch.sie_block->sidad) { + sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!sida_addr) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); return -ENOMEM; } + vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); cc = uv_call(0, (u64)&uvcb); *rc = uvcb.header.rc; @@ -159,23 +184,192 @@ out_err: return -ENOMEM; } -/* this should not fail, but if it does, we must not free the donated memory */ -int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +/** + * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. + * @kvm: the KVM that was associated with this leftover protected VM + * @leftover: details about the leftover protected VM that needs a clean up + * @rc: the RC code of the Destroy Secure Configuration UVC + * @rrc: the RRC code of the Destroy Secure Configuration UVC + * + * Destroy one leftover protected VM. + * On success, kvm->mm->context.protected_count will be decremented atomically + * and all other resources used by the VM will be freed. + * + * Return: 0 in case of success, otherwise 1 + */ +static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, + struct pv_vm_to_be_destroyed *leftover, + u16 *rc, u16 *rrc) { int cc; - cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), - UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + /* It used the destroy-fast UVC, nothing left to do here */ + if (!leftover->handle) + goto done_fast; + cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); + if (cc) + return cc; + /* + * Intentionally leak unusable memory. If the UVC fails, the memory + * used for the VM and its metadata is permanently unusable. + * This can only happen in case of a serious KVM or hardware bug; it + * is not expected to happen in normal operation. + */ + free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); + free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); + vfree(leftover->stor_var); +done_fast: + atomic_dec(&kvm->mm->context.protected_count); + return 0; +} + +/** + * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. + * @kvm: the VM whose memory is to be cleared. + * + * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. + * The CPUs of the protected VM need to be destroyed beforehand. + */ +static void kvm_s390_destroy_lower_2g(struct kvm *kvm) +{ + const unsigned long pages_2g = SZ_2G / PAGE_SIZE; + struct kvm_memory_slot *slot; + unsigned long len; + int srcu_idx; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + /* Take the memslot containing guest absolute address 0 */ + slot = gfn_to_memslot(kvm, 0); + /* Clear all slots or parts thereof that are below 2GB */ + while (slot && slot->base_gfn < pages_2g) { + len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; + s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); + /* Take the next memslot */ + slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages); + } + + srcu_read_unlock(&kvm->srcu, srcu_idx); +} + +static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct uv_cb_destroy_fast uvcb = { + .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, + .header.len = sizeof(uvcb), + .handle = kvm_s390_pv_get_handle(kvm), + }; + int cc; + + cc = uv_call_sched(0, (u64)&uvcb); + if (rc) + *rc = uvcb.header.rc; + if (rrc) + *rrc = uvcb.header.rrc; WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", + uvcb.header.rc, uvcb.header.rrc); + WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", + kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); + /* Inteded memory leak on "impossible" error */ + if (!cc) + kvm_s390_pv_dealloc_vm(kvm); + return cc ? -EIO : 0; +} + +static inline bool is_destroy_fast_available(void) +{ + return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); +} + +/** + * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. + * @kvm: the VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Set aside the protected VM for a subsequent teardown. The VM will be able + * to continue immediately as a non-secure VM, and the information needed to + * properly tear down the protected VM is set aside. If another protected VM + * was already set aside without starting its teardown, this function will + * fail. + * The CPUs of the protected VM need to be destroyed beforehand. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, -EINVAL if another protected VM was already set + * aside, -ENOMEM if the system ran out of memory. + */ +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *priv; + int res = 0; + + lockdep_assert_held(&kvm->lock); /* - * if the mm still has a mapping, make all its pages accessible - * before destroying the guest + * If another protected VM was already prepared for teardown, refuse. + * A normal deinitialization has to be performed instead. */ - if (mmget_not_zero(kvm->mm)) { - s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); - mmput(kvm->mm); + if (kvm->arch.pv.set_aside) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + if (is_destroy_fast_available()) { + res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); + } else { + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) + res = -ENOMEM; } + if (res) { + kfree(priv); + return res; + } + + kvm_s390_destroy_lower_2g(kvm); + kvm_s390_clear_pv_state(kvm); + kvm->arch.pv.set_aside = priv; + + *rc = UVC_RC_EXECUTED; + *rrc = 42; + return 0; +} + +/** + * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM + * @kvm: the KVM whose protected VM needs to be deinitialized + * @rc: the RC code of the UVC + * @rrc: the RRC code of the UVC + * + * Deinitialize the current protected VM. This function will destroy and + * cleanup the current protected VM, but it will not cleanup the guest + * memory. This function should only be called when the protected VM has + * just been created and therefore does not have any guest memory, or when + * the caller cleans up the guest memory separately. + * + * This function should not fail, but if it does, the donated memory must + * not be freed. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, otherwise -EIO + */ +int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + int cc; + + cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); if (!cc) { atomic_dec(&kvm->mm->context.protected_count); kvm_s390_pv_dealloc_vm(kvm); @@ -189,11 +383,137 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return cc ? -EIO : 0; } +/** + * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated + * with a specific KVM. + * @kvm: the KVM to be cleaned up + * @rc: the RC code of the first failing UVC + * @rrc: the RRC code of the first failing UVC + * + * This function will clean up all protected VMs associated with a KVM. + * This includes the active one, the one prepared for deinitialization with + * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. + * + * Context: kvm->lock needs to be held unless being called from + * kvm_arch_destroy_vm. + * + * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO + */ +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *cur; + bool need_zap = false; + u16 _rc, _rrc; + int cc = 0; + + /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */ + atomic_inc(&kvm->mm->context.protected_count); + + *rc = 1; + /* If the current VM is protected, destroy it */ + if (kvm_s390_pv_get_handle(kvm)) { + cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); + need_zap = true; + } + + /* If a previous protected VM was set aside, put it in the need_cleanup list */ + if (kvm->arch.pv.set_aside) { + list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; + } + + /* Cleanup all protected VMs in the need_cleanup list */ + while (!list_empty(&kvm->arch.pv.need_cleanup)) { + cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); + need_zap = true; + if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { + cc = 1; + /* + * Only return the first error rc and rrc, so make + * sure it is not overwritten. All destroys will + * additionally be reported via KVM_UV_EVENT(). + */ + if (*rc == UVC_RC_EXECUTED) { + *rc = _rc; + *rrc = _rrc; + } + } + list_del(&cur->list); + kfree(cur); + } + + /* + * If the mm still has a mapping, try to mark all its pages as + * accessible. The counter should not reach zero before this + * cleanup has been performed. + */ + if (need_zap && mmget_not_zero(kvm->mm)) { + s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); + mmput(kvm->mm); + } + + /* Now the counter can safely reach 0 */ + atomic_dec(&kvm->mm->context.protected_count); + return cc ? -EIO : 0; +} + +/** + * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. + * @kvm: the VM previously associated with the protected VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Tear down the protected VM that had been previously prepared for teardown + * using kvm_s390_pv_set_aside_vm. Ideally this should be called by + * userspace asynchronously from a separate thread. + * + * Context: kvm->lock must not be held. + * + * Return: 0 in case of success, -EINVAL if no protected VM had been + * prepared for asynchronous teardowm, -EIO in case of other errors. + */ +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *p; + int ret = 0; + + lockdep_assert_not_held(&kvm->lock); + mutex_lock(&kvm->lock); + p = kvm->arch.pv.set_aside; + kvm->arch.pv.set_aside = NULL; + mutex_unlock(&kvm->lock); + if (!p) + return -EINVAL; + + /* When a fatal signal is received, stop immediately */ + if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) + goto done; + if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) + ret = -EIO; + kfree(p); + p = NULL; +done: + /* + * p is not NULL if we aborted because of a fatal signal, in which + * case queue the leftover for later cleanup. + */ + if (p) { + mutex_lock(&kvm->lock); + list_add(&p->list, &kvm->arch.pv.need_cleanup); + mutex_unlock(&kvm->lock); + /* Did not finish, but pretend things went well */ + *rc = UVC_RC_EXECUTED; + *rrc = 42; + } + return ret; +} + static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, struct mm_struct *mm) { struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); u16 dummy; + int r; /* * No locking is needed since this is the last thread of the last user of this @@ -202,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, * unregistered. This means that if this notifier runs, then the * struct kvm is still valid. */ - kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) + kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); } static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { @@ -226,8 +548,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ uvcb.guest_stor_len = kvm->arch.pv.guest_len; uvcb.guest_asce = kvm->arch.gmap->asce; - uvcb.guest_sca = (unsigned long)kvm->arch.sca; - uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; + uvcb.guest_sca = virt_to_phys(kvm->arch.sca); + uvcb.conf_base_stor_origin = + virt_to_phys((void *)kvm->arch.pv.stor_base); uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; cc = uv_call_sched(0, (u64)&uvcb); diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index ace2541ababd383448ff654f917b66983f3b33a4..b6a0219e470a4a4ba4609c5fb2131a06d9ddc0c7 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -656,7 +656,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) page = gfn_to_page(kvm, gpa_to_gfn(gpa)); if (is_error_page(page)) return -EINVAL; - *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); + *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK); return 0; } @@ -871,7 +871,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, WARN_ON_ONCE(rc); return 1; } - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + vsie_page->scb_o = phys_to_virt(hpa); return 0; } diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 8947451ae0210f35b7a584e4c3c7854972b95d88..74e1d873dce050fae2c1bc282498371ba3f981bb 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -72,7 +72,7 @@ static struct gmap *gmap_alloc(unsigned long limit) goto out_free; page->index = 0; list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); + table = page_to_virt(page); crst_table_init(table, etype); gmap->table = table; gmap->asce = atype | _ASCE_TABLE_LENGTH | @@ -311,12 +311,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER); if (!page) return -ENOMEM; - new = (unsigned long *) page_to_phys(page); + new = page_to_virt(page); crst_table_init(new, init); spin_lock(&gmap->guest_table_lock); if (*table & _REGION_ENTRY_INVALID) { list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + *table = __pa(new) | _REGION_ENTRY_LENGTH | (*table & _REGION_ENTRY_TYPE_MASK); page->index = gaddr; page = NULL; @@ -556,7 +556,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, gaddr & _REGION1_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -564,7 +564,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, gaddr & _REGION2_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -572,7 +572,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, gaddr & _REGION3_MASK)) return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); } table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; /* Walk the parent mm page table */ @@ -812,7 +812,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION2: table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT; @@ -820,7 +820,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_REGION3: table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT; @@ -828,7 +828,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = __va(*table & _REGION_ENTRY_ORIGIN); fallthrough; case _ASCE_TYPE_SEGMENT: table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT; @@ -836,7 +836,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, break; if (*table & _REGION_ENTRY_INVALID) return NULL; - table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN); + table = __va(*table & _SEGMENT_ENTRY_ORIGIN); table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT; } return table; @@ -1149,7 +1149,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val) if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) { address = pte_val(pte) & PAGE_MASK; address += gaddr & ~PAGE_MASK; - *val = *(unsigned long *) address; + *val = *(unsigned long *)__va(address); set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG))); /* Do *NOT* clear the _PAGE_INVALID bit! */ rc = 0; @@ -1334,7 +1334,8 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) { - unsigned long sto, *ste, *pgt; + unsigned long *ste; + phys_addr_t sto, pgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1342,13 +1343,13 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN)) return; gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1); - sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); + sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT)); gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr); - pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN); + pgt = *ste & _SEGMENT_ENTRY_ORIGIN; *ste = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1364,19 +1365,19 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, unsigned long *sgt) { - unsigned long *pgt; struct page *page; + phys_addr_t pgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) { if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN)) continue; - pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN); + pgt = sgt[i] & _REGION_ENTRY_ORIGIN; sgt[i] = _SEGMENT_ENTRY_EMPTY; - __gmap_unshadow_pgt(sg, raddr, pgt); + __gmap_unshadow_pgt(sg, raddr, __va(pgt)); /* Free page table */ - page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT); + page = phys_to_page(pgt); list_del(&page->lru); page_table_free_pgste(page); } @@ -1391,7 +1392,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) { - unsigned long r3o, *r3e, *sgt; + unsigned long r3o, *r3e; + phys_addr_t sgt; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1400,12 +1402,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1); r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT)); - gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr); - sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr); + sgt = *r3e & _REGION_ENTRY_ORIGIN; *r3e = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1421,19 +1423,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, unsigned long *r3t) { - unsigned long *sgt; struct page *page; + phys_addr_t sgt; int i; BUG_ON(!gmap_is_shadow(sg)); for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) { if (!(r3t[i] & _REGION_ENTRY_ORIGIN)) continue; - sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN); + sgt = r3t[i] & _REGION_ENTRY_ORIGIN; r3t[i] = _REGION3_ENTRY_EMPTY; - __gmap_unshadow_sgt(sg, raddr, sgt); + __gmap_unshadow_sgt(sg, raddr, __va(sgt)); /* Free segment table */ - page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT); + page = phys_to_page(sgt); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1448,7 +1450,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) { - unsigned long r2o, *r2e, *r3t; + unsigned long r2o, *r2e; + phys_addr_t r3t; struct page *page; BUG_ON(!gmap_is_shadow(sg)); @@ -1457,12 +1460,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1); r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT)); - gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr); - r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr); + r3t = *r2e & _REGION_ENTRY_ORIGIN; *r2e = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1478,7 +1481,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, unsigned long *r2t) { - unsigned long *r3t; + phys_addr_t r3t; struct page *page; int i; @@ -1486,11 +1489,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) { if (!(r2t[i] & _REGION_ENTRY_ORIGIN)) continue; - r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN); + r3t = r2t[i] & _REGION_ENTRY_ORIGIN; r2t[i] = _REGION2_ENTRY_EMPTY; - __gmap_unshadow_r3t(sg, raddr, r3t); + __gmap_unshadow_r3t(sg, raddr, __va(r3t)); /* Free region 3 table */ - page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT); + page = phys_to_page(r3t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1505,8 +1508,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr, */ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) { - unsigned long r1o, *r1e, *r2t; + unsigned long r1o, *r1e; struct page *page; + phys_addr_t r2t; BUG_ON(!gmap_is_shadow(sg)); r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */ @@ -1514,12 +1518,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) return; gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1); r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT)); - gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr); - r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN); + gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr); + r2t = *r1e & _REGION_ENTRY_ORIGIN; *r1e = _REGION1_ENTRY_EMPTY; - __gmap_unshadow_r2t(sg, raddr, r2t); + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1535,22 +1539,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr) static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr, unsigned long *r1t) { - unsigned long asce, *r2t; + unsigned long asce; struct page *page; + phys_addr_t r2t; int i; BUG_ON(!gmap_is_shadow(sg)); - asce = (unsigned long) r1t | _ASCE_TYPE_REGION1; + asce = __pa(r1t) | _ASCE_TYPE_REGION1; for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) { if (!(r1t[i] & _REGION_ENTRY_ORIGIN)) continue; - r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN); - __gmap_unshadow_r2t(sg, raddr, r2t); + r2t = r1t[i] & _REGION_ENTRY_ORIGIN; + __gmap_unshadow_r2t(sg, raddr, __va(r2t)); /* Clear entry and flush translation r1t -> r2t */ gmap_idte_one(asce, raddr); r1t[i] = _REGION1_ENTRY_EMPTY; /* Free region 2 table */ - page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT); + page = phys_to_page(r2t); list_del(&page->lru); __free_pages(page, CRST_ALLOC_ORDER); } @@ -1572,7 +1577,7 @@ static void gmap_unshadow(struct gmap *sg) sg->removed = 1; gmap_call_notifier(sg, 0, -1UL); gmap_flush_tlb(sg); - table = (unsigned long *)(sg->asce & _ASCE_ORIGIN); + table = __va(sg->asce & _ASCE_ORIGIN); switch (sg->asce & _ASCE_TYPE_MASK) { case _ASCE_TYPE_REGION1: __gmap_unshadow_r1t(sg, 0, table); @@ -1747,7 +1752,8 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r2t, *table; + unsigned long *table; + phys_addr_t s_r2t; struct page *page; int rc; @@ -1759,7 +1765,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, page->index = r2t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r2t = (unsigned long *) page_to_phys(page); + s_r2t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */ @@ -1774,9 +1780,9 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY); + crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH | + *table = s_r2t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r2t & _REGION_ENTRY_PROTECT); @@ -1797,8 +1803,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 4); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r2t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1831,7 +1836,8 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_r3t, *table; + unsigned long *table; + phys_addr_t s_r3t; struct page *page; int rc; @@ -1843,7 +1849,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, page->index = r3t & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_r3t = (unsigned long *) page_to_phys(page); + s_r3t = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */ @@ -1858,9 +1864,9 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY); + crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH | + *table = s_r3t | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= (r3t & _REGION_ENTRY_PROTECT); @@ -1881,8 +1887,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 3); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_r3t) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -1915,7 +1920,8 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, int fake) { unsigned long raddr, origin, offset, len; - unsigned long *s_sgt, *table; + unsigned long *table; + phys_addr_t s_sgt; struct page *page; int rc; @@ -1927,7 +1933,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, page->index = sgt & _REGION_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_sgt = (unsigned long *) page_to_phys(page); + s_sgt = page_to_phys(page); /* Install shadow region second table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */ @@ -1942,9 +1948,9 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, rc = -EAGAIN; /* Race with shadow */ goto out_free; } - crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY); + crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY); /* mark as invalid as long as the parent table is not protected */ - *table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH | + *table = s_sgt | _REGION_ENTRY_LENGTH | _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID; if (sg->edat_level >= 1) *table |= sgt & _REGION_ENTRY_PROTECT; @@ -1965,8 +1971,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 2); - if (!table || (*table & _REGION_ENTRY_ORIGIN) != - (unsigned long) s_sgt) + if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_REGION_ENTRY_INVALID; @@ -2039,8 +2044,9 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, int fake) { unsigned long raddr, origin; - unsigned long *s_pgt, *table; + unsigned long *table; struct page *page; + phys_addr_t s_pgt; int rc; BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE)); @@ -2051,7 +2057,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, page->index = pgt & _SEGMENT_ENTRY_ORIGIN; if (fake) page->index |= GMAP_SHADOW_FAKE_TABLE; - s_pgt = (unsigned long *) page_to_phys(page); + s_pgt = page_to_phys(page); /* Install shadow page table */ spin_lock(&sg->guest_table_lock); table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */ @@ -2084,8 +2090,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt, spin_lock(&sg->guest_table_lock); if (!rc) { table = gmap_table_walk(sg, saddr, 1); - if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != - (unsigned long) s_pgt) + if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt) rc = -EAGAIN; /* Race with unshadow */ else *table &= ~_SEGMENT_ENTRY_INVALID; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index 1a25d456d8657664f52149f68e0bdbbd0241a028..30ab55f868f6d87dbbb80c3d81b24105ee00de4c 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -141,25 +141,25 @@ void mark_rodata_ro(void) debug_checkwx(); } -int set_memory_encrypted(unsigned long addr, int numpages) +int set_memory_encrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages unshared, (swiotlb, dma_free) */ for (i = 0; i < numpages; ++i) { - uv_remove_shared(addr); - addr += PAGE_SIZE; + uv_remove_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } -int set_memory_decrypted(unsigned long addr, int numpages) +int set_memory_decrypted(unsigned long vaddr, int numpages) { int i; /* make specified pages shared (swiotlb, dma_alloca) */ for (i = 0; i < numpages; ++i) { - uv_set_shared(addr); - addr += PAGE_SIZE; + uv_set_shared(virt_to_phys((void *)vaddr)); + vaddr += PAGE_SIZE; } return 0; } diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 32b3341fe9707ada1069ff0962a594b76c0e11c3..da985e0dc69a5c3722180f9106c69f2696f6f5e6 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -82,7 +82,6 @@ static int __init rng_init (void) sigio_broken(random_fd); hwrng.name = RNG_MODULE_NAME; hwrng.read = rng_dev_read; - hwrng.quality = 1024; err = hwrng_register(&hwrng); if (err) { diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c index acb55b302b14c34d7dfe14fb6ace9067d5441308..3ac220dafec4a506ac7c2ed993f2baa58043ba6a 100644 --- a/arch/um/drivers/virt-pci.c +++ b/arch/um/drivers/virt-pci.c @@ -97,7 +97,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev, } buf = get_cpu_var(um_pci_msg_bufs); - memcpy(buf, cmd, cmd_size); + if (buf) + memcpy(buf, cmd, cmd_size); if (posted) { u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC); @@ -182,6 +183,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, struct um_pci_message_buffer *buf; u8 *data; unsigned long ret = ULONG_MAX; + size_t bytes = sizeof(buf->data); if (!dev) return ULONG_MAX; @@ -189,7 +191,8 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, buf = get_cpu_var(um_pci_msg_bufs); data = buf->data; - memset(buf->data, 0xff, sizeof(buf->data)); + if (buf) + memset(data, 0xff, bytes); switch (size) { case 1: @@ -204,7 +207,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset, goto out; } - if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8)) + if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes)) goto out; switch (size) { diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index 8adf8e89b25588763799f12a676958a3f99e75ae..786b44dc20c98c14bdb704f7b34ae20137212eaa 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end) { } +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ +} + void apply_alternatives(struct alt_instr *start, struct alt_instr *end) { } diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9f8c03b347f9e2a2a7159332268913882e42cc93..7577c3344930bad5d43061d78829cc6c9557e787 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -292,6 +292,8 @@ config X86 select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX + select FUNCTION_ALIGNMENT_16B if X86_64 || X86_ALIGNMENT_16 + select FUNCTION_ALIGNMENT_4B imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE @@ -359,11 +361,6 @@ config ARCH_HAS_CPU_RELAX config ARCH_HIBERNATION_POSSIBLE def_bool y -config ARCH_NR_GPIO - int - default 1024 if X86_64 - default 512 - config ARCH_SUSPEND_POSSIBLE def_bool y @@ -1855,7 +1852,7 @@ config CC_HAS_IBT config X86_KERNEL_IBT prompt "Indirect Branch Tracking" - bool + def_bool y depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL # https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f depends on !LD_IS_LLD || LLD_VERSION >= 140000 @@ -2492,6 +2489,46 @@ config CC_HAS_SLS config CC_HAS_RETURN_THUNK def_bool $(cc-option,-mfunction-return=thunk-extern) +config CC_HAS_ENTRY_PADDING + def_bool $(cc-option,-fpatchable-function-entry=16,16) + +config FUNCTION_PADDING_CFI + int + default 59 if FUNCTION_ALIGNMENT_64B + default 27 if FUNCTION_ALIGNMENT_32B + default 11 if FUNCTION_ALIGNMENT_16B + default 3 if FUNCTION_ALIGNMENT_8B + default 0 + +# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG +# except Kconfig can't do arithmetic :/ +config FUNCTION_PADDING_BYTES + int + default FUNCTION_PADDING_CFI if CFI_CLANG + default FUNCTION_ALIGNMENT + +config CALL_PADDING + def_bool n + depends on CC_HAS_ENTRY_PADDING && OBJTOOL + select FUNCTION_ALIGNMENT_16B + +config FINEIBT + def_bool y + depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE + select CALL_PADDING + +config HAVE_CALL_THUNKS + def_bool y + depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL + +config CALL_THUNKS + def_bool n + select CALL_PADDING + +config PREFIX_SYMBOLS + def_bool y + depends on CALL_PADDING && !CFI_CLANG + menuconfig SPECULATION_MITIGATIONS bool "Mitigations for speculative execution vulnerabilities" default y @@ -2543,6 +2580,37 @@ config CPU_UNRET_ENTRY help Compile the kernel with support for the retbleed=unret mitigation. +config CALL_DEPTH_TRACKING + bool "Mitigate RSB underflow with call depth tracking" + depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE + select CALL_THUNKS + default y + help + Compile the kernel with call depth tracking to mitigate the Intel + SKL Return-Speculation-Buffer (RSB) underflow issue. The + mitigation is off by default and needs to be enabled on the + kernel command line via the retbleed=stuff option. For + non-affected systems the overhead of this option is marginal as + the call depth tracking is using run-time generated call thunks + in a compiler generated padding area and call patching. This + increases text size by ~5%. For non affected systems this space + is unused. On affected SKL systems this results in a significant + performance gain over the IBRS mitigation. + +config CALL_THUNKS_DEBUG + bool "Enable call thunks and call depth tracking debugging" + depends on CALL_DEPTH_TRACKING + select FUNCTION_ALIGNMENT_32B + default n + help + Enable call/ret counters for imbalance detection and build in + a noisy dmesg about callthunks generation and call patching for + trouble shooting. The debug prints need to be enabled on the + kernel command line with 'debug-callthunks'. + Only enable this, when you are debugging call thunks as this + creates a noticable runtime overhead. If unsure say N. + config CPU_IBPB_ENTRY bool "Enable IBPB on kernel entry" depends on CPU_SUP_AMD && X86_64 diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 291c71e68ea71824d79d26e95202aec8e4fdb6b9..d2880075df174b1e9094d91791f6987af31d97e2 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -208,6 +208,12 @@ ifdef CONFIG_SLS KBUILD_CFLAGS += -mharden-sls=all endif +ifdef CONFIG_CALL_PADDING +PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES) +KBUILD_CFLAGS += $(PADDING_CFLAGS) +export PADDING_CFLAGS +endif + KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE) ifdef CONFIG_LTO_CLANG diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 3dc5db651dd0a1974d24b2a41c1c7490a1db8e7b..1acff356d97a9cd7f39a0955450819b1c67d24c1 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -68,7 +68,7 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info) # address by the bootloader. LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker) ifdef CONFIG_LD_ORPHAN_WARN -LDFLAGS_vmlinux += --orphan-handling=warn +LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) endif LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index d4c4281db635c57f3701b212815273a1e2f5c909..a75712991df3e93647a793f4bf404108ca60496a 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -37,6 +37,14 @@ #include #include "pgtable.h" +/* + * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result + * in assembly errors due to trying to move .org backward due to the excessive + * alignment. + */ +#undef __ALIGN +#define __ALIGN .balign 16, 0x90 + /* * Locally defined symbols should be marked hidden: */ diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 3b1d701a4f6c509841998e8b3699096e11ea7dce..3e7a329235bdb8290799a80af50ff296425f21b3 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -107,3 +107,6 @@ quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $< > $@ $(obj)/%.S: $(src)/%.pl FORCE $(call if_changed,perlasm) + +# Disable GCOV in odd or sensitive code +GCOV_PROFILE_curve25519-x86_64.o := n diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S index b48ddebb474894eecfb695efda35c74c5984c63d..cdf3215ec272ced263a234efab1dc754d240debd 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -7,6 +7,7 @@ */ #include +#include #include #define STATE0 %xmm0 @@ -402,7 +403,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) * void crypto_aegis128_aesni_enc(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) FRAME_BEGIN cmp $0x10, LEN @@ -499,7 +500,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc) * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_BEGIN /* load the state: */ @@ -556,7 +557,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) * void crypto_aegis128_aesni_dec(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) FRAME_BEGIN cmp $0x10, LEN @@ -653,7 +654,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec) * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) FRAME_BEGIN /* load the state: */ diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S index c75fd7d015ed8c958819f30d363b59b8b972321c..03ae4cd1d976a2e0fab0e18dc5b597545c4fadd1 100644 --- a/arch/x86/crypto/aria-aesni-avx-asm_64.S +++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S @@ -7,6 +7,7 @@ */ #include +#include #include /* struct aria_ctx: */ @@ -913,7 +914,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way) RET; SYM_FUNC_END(__aria_aesni_avx_crypt_16way) -SYM_FUNC_START(aria_aesni_avx_encrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -938,7 +939,7 @@ SYM_FUNC_START(aria_aesni_avx_encrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_encrypt_16way) -SYM_FUNC_START(aria_aesni_avx_decrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1039,7 +1040,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way) RET; SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way) -SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way) /* input: * %rdi: ctx * %rsi: dst @@ -1208,7 +1209,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way) RET; SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1233,7 +1234,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) /* input: * %rdi: ctx, CTX * %rsi: dst @@ -1258,7 +1259,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way) RET; SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way) -SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) +SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way) /* input: * %rdi: ctx * %rsi: dst diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index 2e1658ddbe1a903b05a07f2cbcece4a0ba50d238..4a30618281ec2e9e85af99f0d0e1ad6ec3dcaa72 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk16) /* input: * %rdi: ctx, CTX @@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 0e4e9abbf4de365d8b979822abde3dab53e6163f..deaf62aa73a6b09cb002cb99562438206f43ef43 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -221,7 +221,6 @@ * Size optimization... with inlined roundsm32 binary would be over 5 times * larger and would only marginally faster. */ -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15, @@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c RET; SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd) -.align 8 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3, %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11, @@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .text -.align 8 SYM_FUNC_START_LOCAL(__camellia_enc_blk32) /* input: * %rdi: ctx, CTX @@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32) jmp .Lenc_done; SYM_FUNC_END(__camellia_enc_blk32) -.align 8 SYM_FUNC_START_LOCAL(__camellia_dec_blk32) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index b258af420c92c7739fd59293585bed169bba11cd..0326a01503c3a554bf89482ed158eb7c8719c9ce 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -208,7 +208,6 @@ .text -.align 16 SYM_FUNC_START_LOCAL(__cast5_enc_blk16) /* input: * %rdi: ctx @@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16) RET; SYM_FUNC_END(__cast5_enc_blk16) -.align 16 SYM_FUNC_START_LOCAL(__cast5_dec_blk16) /* input: * %rdi: ctx diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 721474abfb719003625b68d4dd0c54b1cd9f14bf..5286db5b8165e63244d305304fe9286ba5dd8fc3 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -94,7 +94,6 @@ # # Assumes len >= 16. # -.align 16 SYM_FUNC_START(crc_t10dif_pcl) movdqa .Lbswap_mask(%rip), BSWAP_MASK diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S index 6a0b15e7196a8d711252482e725e5da1e3bc7d18..ef73a3ab87263e4637e5fcbbe92a11632c83fe42 100644 --- a/arch/x86/crypto/nh-avx2-x86_64.S +++ b/arch/x86/crypto/nh-avx2-x86_64.S @@ -8,6 +8,7 @@ */ #include +#include #define PASS0_SUMS %ymm0 #define PASS1_SUMS %ymm1 @@ -65,11 +66,11 @@ /* * void nh_avx2(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_avx2) +SYM_TYPED_FUNC_START(nh_avx2) vmovdqu 0x00(KEY), K0 vmovdqu 0x10(KEY), K1 diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S index 34c567bbcb4faa9f570b648330cb3593f67efcd1..75fb994b6d177eb3f4f8fdea39182aec2cca6e1e 100644 --- a/arch/x86/crypto/nh-sse2-x86_64.S +++ b/arch/x86/crypto/nh-sse2-x86_64.S @@ -8,6 +8,7 @@ */ #include +#include #define PASS0_SUMS %xmm0 #define PASS1_SUMS %xmm1 @@ -67,11 +68,11 @@ /* * void nh_sse2(const u32 *key, const u8 *message, size_t message_len, - * u8 hash[NH_HASH_BYTES]) + * __le64 hash[NH_NUM_PASSES]) * * It's guaranteed that message_len % 16 == 0. */ -SYM_FUNC_START(nh_sse2) +SYM_TYPED_FUNC_START(nh_sse2) movdqu 0x00(KEY), K0 movdqu 0x10(KEY), K1 diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c index 8ea5ab0f1ca74bfa041e607ccc0d790d6166c42b..46b036204ed918dc047e51f5448941a546484418 100644 --- a/arch/x86/crypto/nhpoly1305-avx2-glue.c +++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_avx2(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_avx2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2); + crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2); kernel_fpu_end(); src += n; srclen -= n; diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c index 2b353d42ed13f5c6bd58d02c747ae5daf2e51e75..4a4970d751076826a5ae52293c00ce1c128de52d 100644 --- a/arch/x86/crypto/nhpoly1305-sse2-glue.c +++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c @@ -14,14 +14,7 @@ #include asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len, - u8 hash[NH_HASH_BYTES]); - -/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */ -static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len, - __le64 hash[NH_NUM_PASSES]) -{ - nh_sse2(key, message, message_len, (u8 *)hash); -} + __le64 hash[NH_NUM_PASSES]); static int nhpoly1305_sse2_update(struct shash_desc *desc, const u8 *src, unsigned int srclen) @@ -33,7 +26,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc, unsigned int n = min_t(unsigned int, srclen, SZ_4K); kernel_fpu_begin(); - crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2); + crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2); kernel_fpu_end(); src += n; srclen -= n; diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl index 2077ce7a5647933478727ea1df764af5335ba62a..b9abcd79c1f43b89de38e57a888b22dcc5150b63 100644 --- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl +++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl @@ -108,7 +108,6 @@ if (!$kernel) { sub declare_function() { my ($name, $align, $nargs) = @_; if($kernel) { - $code .= ".align $align\n"; $code .= "SYM_FUNC_START($name)\n"; $code .= ".L$name:\n"; } else { diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 82f2313f512b8664a31d6744bd3225591eaaaca6..97e2836218518029fb5649cf253136b433ad1f8e 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx) RET; SYM_FUNC_END(__serpent_enc_blk8_avx) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 8ea34c9b9316021609302bf353ce6318410399ac..6d60c50593a9b81c67e25a3b7ddf9e34001a639a 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -550,7 +550,6 @@ #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \ transpose_4x4(x0, x1, x2, x3, t0, t1, t2) -.align 8 SYM_FUNC_START_LOCAL(__serpent_enc_blk16) /* input: * %rdi: ctx, CTX @@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16) RET; SYM_FUNC_END(__serpent_enc_blk16) -.align 8 SYM_FUNC_START_LOCAL(__serpent_dec_blk16) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 2f94ec0e763bfefb088b7558b34c4cfefe177757..cade913d48822019fbf216144670ba86de964c41 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -54,6 +54,7 @@ */ #include +#include #define DIGEST_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ @@ -92,8 +93,7 @@ * numBlocks: Number of blocks to process */ .text -.align 32 -SYM_FUNC_START(sha1_ni_transform) +SYM_TYPED_FUNC_START(sha1_ni_transform) push %rbp mov %rsp, %rbp sub $FRAME_SIZE, %rsp diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S index 263f916362e025ead9d059c81017e3974be263ae..f54988c80eb405126de5e56ad87e747663d5d563 100644 --- a/arch/x86/crypto/sha1_ssse3_asm.S +++ b/arch/x86/crypto/sha1_ssse3_asm.S @@ -25,6 +25,7 @@ */ #include +#include #define CTX %rdi // arg1 #define BUF %rsi // arg2 @@ -67,7 +68,7 @@ * param: function's name */ .macro SHA1_VECTOR_ASM name - SYM_FUNC_START(\name) + SYM_TYPED_FUNC_START(\name) push %rbx push %r12 diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 3baa1ec390974a86064819234f66c199af1169ba..5555b5d5215a449eff7624f1505829036b917139 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -346,8 +347,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_avx) -.align 32 +SYM_TYPED_FUNC_START(sha256_transform_avx) pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 9bcdbc47b8b4beeee9f8caaf7a825cd09bea8311..3eada94168526665bbe79920886d70ace9938db0 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -49,6 +49,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define VMOVDQ vmovdqu @@ -523,8 +524,7 @@ STACK_SIZE = _CTX + _CTX_SIZE ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_rorx) -.align 32 +SYM_TYPED_FUNC_START(sha256_transform_rorx) pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index c4a5db612c3276380d914bbe65170811ddc1e7d1..959288eecc6898917164bdbe9d2ba5896ad8cebf 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -47,6 +47,7 @@ ######################################################################## #include +#include ## assume buffers not aligned #define MOVDQ movdqu @@ -355,8 +356,7 @@ a = TMP_ ## arg 3 : Num blocks ######################################################################## .text -SYM_FUNC_START(sha256_transform_ssse3) -.align 32 +SYM_TYPED_FUNC_START(sha256_transform_ssse3) pushq %rbx pushq %r12 pushq %r13 diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 94d50dd27cb532edb5c89fd7e939372fa275eb7c..537b6dcd7ed80ef1f285019e87f27cdba0634e58 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -54,6 +54,7 @@ */ #include +#include #define DIGEST_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ @@ -96,8 +97,7 @@ */ .text -.align 32 -SYM_FUNC_START(sha256_ni_transform) +SYM_TYPED_FUNC_START(sha256_ni_transform) shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 1fefe6dd3a9e2f90de0deff603b04bafa08dc5c2..b0984f19fdb408e952773793fa0244f2a50ff8c9 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include .text @@ -273,7 +274,7 @@ frame_size = frame_WK + WK_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks ######################################################################## -SYM_FUNC_START(sha512_transform_avx) +SYM_TYPED_FUNC_START(sha512_transform_avx) test msglen, msglen je nowork diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 5cdaab7d6901544a868f95bb0cf894cb8bbe8f3a..b1ca99055ef994f5301a53b2e1ccfc7754e7f03e 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -50,6 +50,7 @@ ######################################################################## #include +#include .text @@ -565,7 +566,7 @@ frame_size = frame_CTX + CTX_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks ######################################################################## -SYM_FUNC_START(sha512_transform_rorx) +SYM_TYPED_FUNC_START(sha512_transform_rorx) # Save GPRs push %rbx push %r12 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index b84c22e06c5f7987f8b458810b23e96127f6e7b0..c06afb5270e5f5789ba0d45c948a5f68ac4235f3 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -48,6 +48,7 @@ ######################################################################## #include +#include .text @@ -274,7 +275,7 @@ frame_size = frame_WK + WK_SIZE # of SHA512 message blocks. # "blocks" is the message length in SHA512 blocks. ######################################################################## -SYM_FUNC_START(sha512_transform_ssse3) +SYM_TYPED_FUNC_START(sha512_transform_ssse3) test msglen, msglen je nowork diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S index b12b9efb5ec5142dea9da76087e1dd84625aa96a..503bab450a9157200bc752dceaf03b9bbd020886 100644 --- a/arch/x86/crypto/sm3-avx-asm_64.S +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -12,6 +12,7 @@ */ #include +#include #include /* Context structure */ @@ -327,8 +328,7 @@ * void sm3_transform_avx(struct sm3_state *state, * const u8 *data, int nblocks); */ -.align 16 -SYM_FUNC_START(sm3_transform_avx) +SYM_TYPED_FUNC_START(sm3_transform_avx) /* input: * %rdi: ctx, CTX * %rsi: data (64*nblks bytes) diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S index 4767ab61ff489bebf206cd2a6ddea678b3d2304b..e2668d2fe6ce181650807a8fa5abef864b76bd1b 100644 --- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S @@ -14,6 +14,7 @@ */ #include +#include #include #define rRIP (%rip) @@ -139,13 +140,11 @@ .text -.align 16 /* * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt4) /* input: * %rdi: round key array, CTX @@ -249,7 +248,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4) RET; SYM_FUNC_END(sm4_aesni_avx_crypt4) -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk8) /* input: * %rdi: round key array, CTX @@ -363,7 +361,6 @@ SYM_FUNC_END(__sm4_crypt_blk8) * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst, * const u8 *src, int nblocks) */ -.align 8 SYM_FUNC_START(sm4_aesni_avx_crypt8) /* input: * %rdi: round key array, CTX @@ -419,8 +416,7 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8) * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) @@ -494,8 +490,7 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8) * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) @@ -544,8 +539,7 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8) * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) +SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8) /* input: * %rdi: round key array, CTX * %rsi: dst (8 blocks) diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S index 4732fe8bb65b6579e2ce8a4489dd7df8e89be74c..98ede94592877c4bb9f2b40614eb34f2055246aa 100644 --- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S @@ -14,6 +14,7 @@ */ #include +#include #include #define rRIP (%rip) @@ -153,9 +154,6 @@ .long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef .text -.align 16 - -.align 8 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16) /* input: * %rdi: round key array, CTX @@ -281,8 +279,7 @@ SYM_FUNC_END(__sm4_crypt_blk16) * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) @@ -394,8 +391,7 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16) * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) @@ -448,8 +444,7 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16) * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst, * const u8 *src, u8 *iv) */ -.align 8 -SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) +SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16) /* input: * %rdi: round key array, CTX * %rsi: dst (16 blocks) diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index 31f9b2ec3857d4d083a0d7b895cbf93c709cc134..12fde271cd3f6b2f20e80aff87c73658f9a7f5d3 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -228,7 +228,6 @@ vpxor x2, wkey, x2; \ vpxor x3, wkey, x3; -.align 8 SYM_FUNC_START_LOCAL(__twofish_enc_blk8) /* input: * %rdi: ctx, CTX @@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8) RET; SYM_FUNC_END(__twofish_enc_blk8) -.align 8 SYM_FUNC_START_LOCAL(__twofish_dec_blk8) /* input: * %rdi: ctx, CTX diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c index f9c4adc274040a605718890689e018e81affd422..0614beece27932df8a1af3fd8a7f64e99b6d0db2 100644 --- a/arch/x86/crypto/twofish_glue.c +++ b/arch/x86/crypto/twofish_glue.c @@ -38,8 +38,8 @@ * Third Edition. */ +#include #include -#include #include #include #include diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index e309e7156038935ab92cd025616c5c765e3a5214..91397f58ac3008556fc0cfbb2437eee45da706ff 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi) * is using the thread stack right now, so it's safe for us to use it. */ movl %esp, %ebx - movl PER_CPU_VAR(cpu_current_top_of_stack), %esp + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp call exc_nmi movl %ebx, %esp @@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movl PER_CPU_VAR(cpu_current_top_of_stack), %esi + movl PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi leal -TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp call make_task_dead diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 9953d966d12443115bc7ecb3d5c083c9b5c8847f..15739a2c09833ba30ac8b2eb6a9be9e93608f7ea 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64) /* tss.sp2 is scratch space. */ movq %rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm) #ifdef CONFIG_STACKPROTECTOR movq TASK_stack_canary(%rsi), %rbx - movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset + movq %rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary #endif /* @@ -284,9 +284,11 @@ SYM_FUNC_END(__switch_to_asm) * r12: kernel thread arg */ .pushsection .text, "ax" -SYM_CODE_START(ret_from_fork) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(ret_from_fork) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR // copy_thread + CALL_DEPTH_ACCOUNT movq %rax, %rdi call schedule_tail /* rdi: 'prev' task parameter */ @@ -326,11 +328,12 @@ SYM_CODE_END(ret_from_fork) #endif .endm -SYM_CODE_START_LOCAL(xen_error_entry) +SYM_CODE_START(xen_error_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL RET SYM_CODE_END(xen_error_entry) @@ -600,13 +603,13 @@ SYM_CODE_END(\asmsym) * shared between 32 and 64 bit and emit the __irqentry_text_* markers * so the stacktrace boundary checks work. */ - .align 16 + __ALIGN .globl __irqentry_text_start __irqentry_text_start: #include - .align 16 + __ALIGN .globl __irqentry_text_end __irqentry_text_end: ANNOTATE_NOENDBR @@ -828,7 +831,8 @@ EXPORT_SYMBOL(asm_load_gs_index) * * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs) */ -SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback) + __FUNC_ALIGN +SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback) /* * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will @@ -856,7 +860,8 @@ SYM_CODE_END(exc_xen_hypervisor_callback) * We distinguish between categories by comparing each saved segment register * with its current contents: any discrepancy means we in category 1. */ -SYM_CODE_START(xen_failsafe_callback) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(xen_failsafe_callback) UNWIND_HINT_EMPTY ENDBR movl %ds, %ecx @@ -903,7 +908,8 @@ SYM_CODE_END(xen_failsafe_callback) * R14 - old CR3 * R15 - old SPEC_CTRL */ -SYM_CODE_START_LOCAL(paranoid_entry) +SYM_CODE_START(paranoid_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 ENCODE_FRAME_POINTER 8 @@ -972,7 +978,7 @@ SYM_CODE_START_LOCAL(paranoid_entry) * CR3 above, keep the old value in a callee saved register. */ IBRS_ENTER save_reg=%r15 - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL RET SYM_CODE_END(paranoid_entry) @@ -1038,7 +1044,8 @@ SYM_CODE_END(paranoid_exit) /* * Switch GS and CR3 if needed. */ -SYM_CODE_START_LOCAL(error_entry) +SYM_CODE_START(error_entry) + ANNOTATE_NOENDBR UNWIND_HINT_FUNC PUSH_AND_CLEAR_REGS save_ret=1 @@ -1056,14 +1063,11 @@ SYM_CODE_START_LOCAL(error_entry) /* We have user CR3. Change to kernel CR3. */ SWITCH_TO_KERNEL_CR3 scratch_reg=%rax IBRS_ENTER - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ -.Lerror_entry_from_usermode_after_swapgs: - /* Put us onto the real thread stack. */ - call sync_regs - RET + jmp sync_regs /* * There are two places in the kernel that can potentially fault with @@ -1094,6 +1098,7 @@ SYM_CODE_START_LOCAL(error_entry) */ .Lerror_entry_done_lfence: FENCE_SWAPGS_KERNEL_ENTRY + CALL_DEPTH_ACCOUNT leaq 8(%rsp), %rax /* return pt_regs pointer */ ANNOTATE_UNRET_END RET @@ -1112,7 +1117,7 @@ SYM_CODE_START_LOCAL(error_entry) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rax IBRS_ENTER - UNTRAIN_RET + UNTRAIN_RET_FROM_CALL /* * Pretend that the exception came from user mode: set up pt_regs @@ -1121,7 +1126,7 @@ SYM_CODE_START_LOCAL(error_entry) leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */ call fixup_bad_iret mov %rax, %rdi - jmp .Lerror_entry_from_usermode_after_swapgs + jmp sync_regs SYM_CODE_END(error_entry) SYM_CODE_START_LOCAL(error_return) @@ -1206,7 +1211,7 @@ SYM_CODE_START(asm_exc_nmi) FENCE_SWAPGS_USER_ENTRY SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx movq %rsp, %rdx - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp UNWIND_HINT_IRET_REGS base=%rdx offset=8 pushq 5*8(%rdx) /* pt_regs->ss */ pushq 4*8(%rdx) /* pt_regs->rsp */ @@ -1516,12 +1521,13 @@ SYM_CODE_END(ignore_sysret) #endif .pushsection .text, "ax" -SYM_CODE_START(rewind_stack_and_make_dead) + __FUNC_ALIGN +SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) UNWIND_HINT_FUNC /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp - movq PER_CPU_VAR(cpu_current_top_of_stack), %rax + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax leaq -PTREGS_SIZE(%rax), %rsp UNWIND_HINT_REGS diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 59b93901660db94092d1e881d58affe919234673..70150298f8bdf5ea1cfa9f895a0443b521e5b60b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rax popq %rax - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp /* Construct struct pt_regs on stack */ pushq $__USER_DS /* pt_regs->ss */ @@ -128,7 +128,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) popfq jmp .Lsysenter_flags_fixed SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL) - ANNOTATE_NOENDBR // is_sysenter_singlestep SYM_CODE_END(entry_SYSENTER_compat) /* @@ -191,7 +190,7 @@ SYM_CODE_START(entry_SYSCALL_compat) SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp /* Switch to the kernel stack */ - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL) ANNOTATE_NOENDBR @@ -332,7 +331,7 @@ SYM_CODE_START(entry_INT80_compat) ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV movq %rsp, %rax - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp + movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp pushq 5*8(%rax) /* regs->ss */ pushq 4*8(%rax) /* regs->rsp */ diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index f38b07d2768bbfc186bf5df4963768166dc738f8..5e37f41e5f14d483283c8dc81f30463ff228be4b 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -11,7 +11,7 @@ /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func -SYM_FUNC_START_NOALIGN(\name) +SYM_FUNC_START(\name) pushq %rbp movq %rsp, %rbp @@ -36,7 +36,7 @@ SYM_FUNC_END(\name) EXPORT_SYMBOL(preempt_schedule_thunk) EXPORT_SYMBOL(preempt_schedule_notrace_thunk) -SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore) +SYM_CODE_START_LOCAL(__thunk_restore) popq %r11 popq %r10 popq %r9 diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 3e88b9df8c8f137104566f4fb7c4b3d27ef03f09..838613ac15b8205ef7d982d2186b371c8b0fb1b6 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -33,11 +33,12 @@ vobjs32-y += vdso32/vclock_gettime.o vobjs-$(CONFIG_X86_SGX) += vsgx.o # files to link into kernel -obj-y += vma.o extable.o -KASAN_SANITIZE_vma.o := y -UBSAN_SANITIZE_vma.o := y -KCSAN_SANITIZE_vma.o := y -OBJECT_FILES_NON_STANDARD_vma.o := n +obj-y += vma.o extable.o +KASAN_SANITIZE_vma.o := y +UBSAN_SANITIZE_vma.o := y +KCSAN_SANITIZE_vma.o := y +OBJECT_FILES_NON_STANDARD_vma.o := n +OBJECT_FILES_NON_STANDARD_extable.o := n # vDSO images to build vdso_img-$(VDSO64-y) += 64 @@ -94,7 +95,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),) endif endif -$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) +$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL) $(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO # @@ -157,6 +158,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32)) +KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32)) KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic KBUILD_CFLAGS_32 += -fno-stack-protector KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls) diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 017baba56b01923bf974c4d756590828d55e72e7..1f21f576ca77fee6040a4d555eeac87ec706d638 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1603,10 +1603,8 @@ clear_arch_lbr: * x86_perf_get_lbr - get the LBR records information * * @lbr: the caller's memory to store the LBR records information - * - * Returns: 0 indicates the LBR info has been successfully obtained */ -int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { int lbr_fmt = x86_pmu.intel_cap.lbr_format; @@ -1614,8 +1612,6 @@ int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0; - - return 0; } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 9542c582d546b2ae270f3568794c57de4478d0b4..7659217f4d492efe7badbd5cf5d8e866c12833f2 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -78,8 +78,43 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end); extern void apply_retpolines(s32 *start, s32 *end); extern void apply_returns(s32 *start, s32 *end); extern void apply_ibt_endbr(s32 *start, s32 *end); +extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine, + s32 *start_cfi, s32 *end_cfi); struct module; +struct paravirt_patch_site; + +struct callthunk_sites { + s32 *call_start, *call_end; + struct paravirt_patch_site *pv_start, *pv_end; +}; + +#ifdef CONFIG_CALL_THUNKS +extern void callthunks_patch_builtin_calls(void); +extern void callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod); +extern void *callthunks_translate_call_dest(void *dest); +extern bool is_callthunk(void *addr); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +#else +static __always_inline void callthunks_patch_builtin_calls(void) {} +static __always_inline void +callthunks_patch_module_calls(struct callthunk_sites *sites, + struct module *mod) {} +static __always_inline void *callthunks_translate_call_dest(void *dest) +{ + return dest; +} +static __always_inline bool is_callthunk(void *addr) +{ + return false; +} +static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, + void *func) +{ + return 0; +} +#endif #ifdef CONFIG_SMP extern void alternatives_smp_module_add(struct module *mod, char *name, @@ -347,6 +382,7 @@ static inline int alternatives_text_reserved(void *start, void *end) #define old_len 141b-140b #define new_len1 144f-143f #define new_len2 145f-144f +#define new_len3 146f-145f /* * gas compatible max based on the idea from: @@ -354,7 +390,8 @@ static inline int alternatives_text_reserved(void *start, void *end) * * The additional "-" is needed because gas uses a "true" value of -1. */ -#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) +#define alt_max_2(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b))))) +#define alt_max_3(a, b, c) (alt_max_2(alt_max_2(a, b), c)) /* @@ -366,13 +403,36 @@ static inline int alternatives_text_reserved(void *start, void *end) 140: \oldinstr 141: - .skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \ - (alt_max_short(new_len1, new_len2) - (old_len)),0x90 + .skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \ + (alt_max_2(new_len1, new_len2) - (old_len)),0x90 +142: + + .pushsection .altinstructions,"a" + altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f + altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + .popsection + + .pushsection .altinstr_replacement,"ax" +143: + \newinstr1 +144: + \newinstr2 +145: + .popsection +.endm + +.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3 +140: + \oldinstr +141: + .skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \ + (alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90 142: .pushsection .altinstructions,"a" altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f + altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f .popsection .pushsection .altinstr_replacement,"ax" @@ -381,6 +441,8 @@ static inline int alternatives_text_reserved(void *start, void *end) 144: \newinstr2 145: + \newinstr3 +146: .popsection .endm diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index c9f4730bb113f06b247d42b1e5aee2a45fdf4ef0..61012476d66e0e131939f2146d248e1215b7b152 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -305,13 +305,15 @@ #define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */ #define X86_FEATURE_RSB_VMEXIT_LITE (11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */ #define X86_FEATURE_SGX_EDECCSSA (11*32+18) /* "" SGX EDECCSSA user leaf function */ - - +#define X86_FEATURE_CALL_DEPTH (11*32+19) /* "" Call depth tracking for RSB stuffing */ #define X86_FEATURE_MSR_TSX_CTRL (11*32+20) /* "" MSR IA32_TSX_CTRL (Intel) implemented */ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ +#define X86_FEATURE_CMPCCXADD (12*32+ 7) /* "" CMPccXADD instructions */ +#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ +#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h index 3e204e6140b51fb9567276c9886d21a56aafceed..a1168e7b69e5b747276b61ac62b755ad748e5534 100644 --- a/arch/x86/include/asm/current.h +++ b/arch/x86/include/asm/current.h @@ -3,16 +3,42 @@ #define _ASM_X86_CURRENT_H #include -#include #ifndef __ASSEMBLY__ + +#include +#include + struct task_struct; -DECLARE_PER_CPU(struct task_struct *, current_task); +struct pcpu_hot { + union { + struct { + struct task_struct *current_task; + int preempt_count; + int cpu_number; +#ifdef CONFIG_CALL_DEPTH_TRACKING + u64 call_depth; +#endif + unsigned long top_of_stack; + void *hardirq_stack_ptr; + u16 softirq_pending; +#ifdef CONFIG_X86_64 + bool hardirq_stack_inuse; +#else + void *softirq_stack_ptr; +#endif + }; + u8 pad[64]; + }; +}; +static_assert(sizeof(struct pcpu_hot) == 64); + +DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot); static __always_inline struct task_struct *get_current(void) { - return this_cpu_read_stable(current_task); + return this_cpu_read_stable(pcpu_hot.current_task); } #define current get_current() diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index cfdf307ddc0129019f64efebc01bf5a5159578fa..b049d950612fdd7c01b0508adcb9d8014ad9545f 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -2,8 +2,8 @@ #ifndef _ASM_X86_DEBUGREG_H #define _ASM_X86_DEBUGREG_H - #include +#include #include DECLARE_PER_CPU(unsigned long, cpu_dr7); diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index c862552d7d6d92e3b48e06ba4dcbe1e7f0065575..c44b56f7ffba0d0e4bf881c035a3a3668be9929e 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -69,6 +69,12 @@ # define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31)) #endif +#ifdef CONFIG_CALL_DEPTH_TRACKING +# define DISABLE_CALL_DEPTH_TRACKING 0 +#else +# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) +#endif + #ifdef CONFIG_INTEL_IOMMU_SVM # define DISABLE_ENQCMD 0 #else @@ -107,7 +113,8 @@ #define DISABLED_MASK8 (DISABLE_XENPV|DISABLE_TDX_GUEST) #define DISABLED_MASK9 (DISABLE_SGX) #define DISABLED_MASK10 0 -#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET) +#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ + DISABLE_CALL_DEPTH_TRACKING) #define DISABLED_MASK12 0 #define DISABLED_MASK13 0 #define DISABLED_MASK14 0 diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 275e7fd20310f7a237d2d22f790a2c0928c170a9..66837b8c67f1a9f794f9b65008bace6278f1e3d3 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -3,9 +3,9 @@ #define _ASM_X86_HARDIRQ_H #include +#include typedef struct { - u16 __softirq_pending; #if IS_ENABLED(CONFIG_KVM_INTEL) u8 kvm_cpu_l1tf_flush_l1d; #endif @@ -60,6 +60,7 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu); extern u64 arch_irq_stat(void); #define arch_irq_stat arch_irq_stat +#define local_softirq_pending_ref pcpu_hot.softirq_pending #if IS_ENABLED(CONFIG_KVM_INTEL) static inline void kvm_set_cpu_l1tf_flush_l1d(void) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 6d9368ea3701ca11b441a7324ba6cceed2e66308..08e822bd7aa601314980dfada7688407df539cbb 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -61,6 +61,8 @@ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) /* Support for debug MSRs available */ #define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) +/* Support for extended gva ranges for flush hypercalls available */ +#define HV_FEATURE_EXT_GVA_RANGES_FLUSH BIT(14) /* * Support for returning hypercall output block via XMM * registers is available @@ -607,6 +609,41 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF +/* + * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by + * pairing it with architecturally impossible exit reasons. Bit 28 is set only + * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit + * is pending. I.e. it will never be set by hardware for non-SMI exits (there + * are only three), nor will it ever be set unless the VMM is an STM. + */ +#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031 + +/* + * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose + * SVM enlightenments to guests. + */ +struct hv_vmcb_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB. + */ +#define HV_VMCB_NESTED_ENLIGHTENMENTS 31 + +/* Synthetic VM-Exit */ +#define HV_SVM_EXITCODE_ENL 0xf0000000 +#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1) + struct hv_partition_assist_pg { u32 tlb_lock_count; }; diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index 147cb8fdda92e9e0a9a2e74bb876968af8ba29dc..798183867d78969b2a7fd7cc3169cf310926fa8e 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -116,7 +116,7 @@ ASM_CALL_ARG2 #define call_on_irqstack(func, asm_call, argconstr...) \ - call_on_stack(__this_cpu_read(hardirq_stack_ptr), \ + call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr), \ func, asm_call, argconstr) /* Macros to assert type correctness for run_*_on_irqstack macros */ @@ -135,7 +135,7 @@ * User mode entry and interrupt on the irq stack do not \ * switch stacks. If from user mode the task stack is empty. \ */ \ - if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) { \ + if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \ irq_enter_rcu(); \ func(c_args); \ irq_exit_rcu(); \ @@ -146,9 +146,9 @@ * places. Invoke the stack switch macro with the call \ * sequence which matches the above direct invocation. \ */ \ - __this_cpu_write(hardirq_stack_inuse, true); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ call_on_irqstack(func, asm_call, constr); \ - __this_cpu_write(hardirq_stack_inuse, false); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ } \ } @@ -212,9 +212,9 @@ */ #define do_softirq_own_stack() \ { \ - __this_cpu_write(hardirq_stack_inuse, true); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, true); \ call_on_irqstack(__do_softirq, ASM_CALL_ARG0); \ - __this_cpu_write(hardirq_stack_inuse, false); \ + __this_cpu_write(pcpu_hot.hardirq_stack_inuse, false); \ } #endif diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h index 82ba4a564e5875abd05f5e1a34e76473eb20a109..abccd51dcfca1b5b6848a1b481797fbaa043870e 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) KVM_X86_OP_OPTIONAL(set_hv_timer) KVM_X86_OP_OPTIONAL(cancel_hv_timer) KVM_X86_OP(setup_mce) +#ifdef CONFIG_KVM_SMM KVM_X86_OP(smi_allowed) KVM_X86_OP(enter_smm) KVM_X86_OP(leave_smm) KVM_X86_OP(enable_smi_window) +#endif KVM_X86_OP_OPTIONAL(mem_enc_ioctl) KVM_X86_OP_OPTIONAL(mem_enc_register_region) KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) @@ -123,7 +125,7 @@ KVM_X86_OP_OPTIONAL(guest_memory_reclaimed) KVM_X86_OP(get_msr_feature) KVM_X86_OP(can_emulate_instruction) KVM_X86_OP(apic_init_signal_blocked) -KVM_X86_OP_OPTIONAL(enable_direct_tlbflush) +KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush) KVM_X86_OP_OPTIONAL(migrate_timers) KVM_X86_OP(msr_filter_changed) KVM_X86_OP(complete_emulated_msr) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f05ebaa26f0ff52a43294ea0c9e60175037c123e..f35f1ff4427bb85273abab37fe40a9154bf11cd9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -81,7 +82,9 @@ #define KVM_REQ_NMI KVM_ARCH_REQ(9) #define KVM_REQ_PMU KVM_ARCH_REQ(10) #define KVM_REQ_PMI KVM_ARCH_REQ(11) +#ifdef CONFIG_KVM_SMM #define KVM_REQ_SMI KVM_ARCH_REQ(12) +#endif #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) #define KVM_REQ_MCLOCK_INPROGRESS \ KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) @@ -108,6 +111,8 @@ KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \ KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -204,6 +209,7 @@ typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; +union kvm_smram; enum x86_intercept; enum x86_intercept_stage; @@ -253,16 +259,16 @@ enum x86_intercept_stage; #define PFERR_GUEST_PAGE_BIT 33 #define PFERR_IMPLICIT_ACCESS_BIT 48 -#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT) -#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT) -#define PFERR_USER_MASK (1U << PFERR_USER_BIT) -#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT) -#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT) -#define PFERR_PK_MASK (1U << PFERR_PK_BIT) -#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT) -#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT) -#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT) -#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT) +#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT) +#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT) +#define PFERR_USER_MASK BIT(PFERR_USER_BIT) +#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT) +#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT) +#define PFERR_PK_MASK BIT(PFERR_PK_BIT) +#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT) +#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT) +#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT) +#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT) #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK | \ PFERR_WRITE_MASK | \ @@ -488,17 +494,19 @@ enum pmc_type { struct kvm_pmc { enum pmc_type type; u8 idx; + bool is_paused; + bool intr; u64 counter; + u64 prev_counter; u64 eventsel; struct perf_event *perf_event; struct kvm_vcpu *vcpu; /* + * only for creating or reusing perf_event, * eventsel value for general purpose counters, * ctrl value for fixed counters. */ u64 current_config; - bool is_paused; - bool intr; }; /* More counters may conflict with other existing Architectural MSRs */ @@ -524,7 +532,16 @@ struct kvm_pmu { struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; - DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + + /* + * Overlay the bitmap with a 64-bit atomic so that all bits can be + * set in a single access, e.g. to reprogram all counters when the PMU + * filter changes. + */ + union { + DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); + atomic64_t __reprogram_pmi; + }; DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX); @@ -602,6 +619,29 @@ struct kvm_vcpu_hv_synic { bool dont_zero_synic_pages; }; +/* The maximum number of entries on the TLB flush fifo. */ +#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16) +/* + * Note: the following 'magic' entry is made up by KVM to avoid putting + * anything besides GVA on the TLB flush fifo. It is theoretically possible + * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000 + * which will look identical. KVM's action to 'flush everything' instead of + * flushing these particular addresses is, however, fully legitimate as + * flushing more than requested is always OK. + */ +#define KVM_HV_TLB_FLUSHALL_ENTRY ((u64)-1) + +enum hv_tlb_flush_fifos { + HV_L1_TLB_FLUSH_FIFO, + HV_L2_TLB_FLUSH_FIFO, + HV_NR_TLB_FLUSH_FIFOS, +}; + +struct kvm_vcpu_hv_tlb_flush_fifo { + spinlock_t write_lock; + DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE); +}; + /* Hyper-V per vcpu emulation context */ struct kvm_vcpu_hv { struct kvm_vcpu *vcpu; @@ -623,6 +663,19 @@ struct kvm_vcpu_hv { u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */ u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */ } cpuid_cache; + + struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS]; + + /* Preallocated buffer for handling hypercalls passing sparse vCPU set */ + u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS]; + + struct hv_vp_assist_page vp_assist_page; + + struct { + u64 pa_page_gpa; + u64 vm_id; + u32 vp_id; + } nested; }; /* Xen HVM per vcpu emulation context */ @@ -633,6 +686,7 @@ struct kvm_vcpu_xen { struct gfn_to_pfn_cache vcpu_info_cache; struct gfn_to_pfn_cache vcpu_time_info_cache; struct gfn_to_pfn_cache runstate_cache; + struct gfn_to_pfn_cache runstate2_cache; u64 last_steal; u64 runstate_entry_time; u64 runstate_times[4]; @@ -1059,6 +1113,7 @@ struct msr_bitmap_range { struct kvm_xen { u32 xen_version; bool long_mode; + bool runstate_update_flag; u8 upcall_vector; struct gfn_to_pfn_cache shinfo_cache; struct idr evtchn_ports; @@ -1156,7 +1211,18 @@ struct kvm_arch { struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES]; struct list_head active_mmu_pages; struct list_head zapped_obsolete_pages; - struct list_head lpage_disallowed_mmu_pages; + /* + * A list of kvm_mmu_page structs that, if zapped, could possibly be + * replaced by an NX huge page. A shadow page is on this list if its + * existence disallows an NX huge page (nx_huge_page_disallowed is set) + * and there are no other conditions that prevent a huge page, e.g. + * the backing host page is huge, dirtly logging is not enabled for its + * memslot, etc... Note, zapping shadow pages on this list doesn't + * guarantee an NX huge page will be created in its stead, e.g. if the + * guest attempts to execute from the region then KVM obviously can't + * create an NX huge page (without hanging the guest). + */ + struct list_head possible_nx_huge_pages; struct kvm_page_track_notifier_node mmu_sp_tracker; struct kvm_page_track_notifier_head track_notifier_head; /* @@ -1272,7 +1338,7 @@ struct kvm_arch { bool sgx_provisioning_allowed; struct kvm_pmu_event_filter __rcu *pmu_event_filter; - struct task_struct *nx_lpage_recovery_thread; + struct task_struct *nx_huge_page_recovery_thread; #ifdef CONFIG_X86_64 /* @@ -1284,6 +1350,9 @@ struct kvm_arch { */ bool tdp_mmu_enabled; + /* The number of TDP MMU pages across all roots. */ + atomic64_t tdp_mmu_pages; + /* * List of kvm_mmu_page structs being used as roots. * All kvm_mmu_page structs in the list should have @@ -1304,21 +1373,13 @@ struct kvm_arch { */ struct list_head tdp_mmu_roots; - /* - * List of kvm_mmu_page structs not being used as roots. - * All kvm_mmu_page structs in the list should have - * tdp_mmu_page set and a tdp_mmu_root_count of 0. - */ - struct list_head tdp_mmu_pages; - /* * Protects accesses to the following fields when the MMU lock * is held in read mode: * - tdp_mmu_roots (above) - * - tdp_mmu_pages (above) * - the link field of kvm_mmu_page structs used by the TDP MMU - * - lpage_disallowed_mmu_pages - * - the lpage_disallowed_link field of kvm_mmu_page structs used + * - possible_nx_huge_pages; + * - the possible_nx_huge_page_link field of kvm_mmu_page structs used * by the TDP MMU * It is acceptable, but not necessary, to acquire this lock when * the thread holds the MMU lock in write mode. @@ -1612,10 +1673,12 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); +#ifdef CONFIG_KVM_SMM int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); - int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); - int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); + int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); + int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); void (*enable_smi_window)(struct kvm_vcpu *vcpu); +#endif int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1630,7 +1693,7 @@ struct kvm_x86_ops { void *insn, int insn_len); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); - int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu); void (*migrate_timers)(struct kvm_vcpu *vcpu); void (*msr_filter_changed)(struct kvm_vcpu *vcpu); @@ -1663,6 +1726,7 @@ struct kvm_x86_nested_ops { int (*enable_evmcs)(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); + void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { @@ -1844,6 +1908,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu); int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu); void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); +void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg); void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); @@ -1909,8 +1974,6 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu, void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu); gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); -gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, - struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception); gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, @@ -1994,14 +2057,18 @@ enum { #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ + +#ifdef CONFIG_KVM_SMM #define HF_SMM_MASK (1 << 6) #define HF_SMM_INSIDE_NMI_MASK (1 << 7) -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE -#define KVM_ADDRESS_SPACE_NUM 2 - -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE +# define KVM_ADDRESS_SPACE_NUM 2 +# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) +#else +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) +#endif #define KVM_ARCH_WANT_MMU_NOTIFIER @@ -2089,14 +2156,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu) #endif } -#define put_smstate(type, buf, offset, val) \ - *(type *)((buf) + (offset) - 0x7e00) = val - -#define GET_SMSTATE(type, buf, offset) \ - (*(type *)((buf) + (offset) - 0x7e00)) - -int kvm_cpu_dirty_log_size(void); - int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); #define KVM_CLOCK_VALID_FLAGS \ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f484d656d34ee0a8ae607d85c3f586ca608c5d90..dd9b8118f7848e279cf1fec9247f814812e8ce5f 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -12,13 +12,26 @@ #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0))) #endif /* CONFIG_X86_32 */ -#ifdef __ASSEMBLY__ - -#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16) -#define __ALIGN .p2align 4, 0x90 +#define __ALIGN .balign CONFIG_FUNCTION_ALIGNMENT, 0x90; #define __ALIGN_STR __stringify(__ALIGN) + +#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) +#define FUNCTION_PADDING .skip CONFIG_FUNCTION_ALIGNMENT, 0x90; +#else +#define FUNCTION_PADDING +#endif + +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) +# define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING +#else +# define __FUNC_ALIGN __ALIGN #endif +#define ASM_FUNC_ALIGN __stringify(__FUNC_ALIGN) +#define SYM_F_ALIGN __FUNC_ALIGN + +#ifdef __ASSEMBLY__ + #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) #define RET jmp __x86_return_thunk #else /* CONFIG_RETPOLINE */ @@ -43,11 +56,45 @@ #endif /* __ASSEMBLY__ */ +/* + * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the + * CFI symbol layout changes. + * + * Without CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .skip FUNCTION_PADDING, 0x90 + * .byte 0xb8 + * .long __kcfi_typeid_##name + * name: + * + * With CALL_THUNKS: + * + * .align FUNCTION_ALIGNMENT + * __cfi_##name: + * .byte 0xb8 + * .long __kcfi_typeid_##name + * .skip FUNCTION_PADDING, 0x90 + * name: + * + * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized. + */ + +#ifdef CONFIG_CALL_PADDING +#define CFI_PRE_PADDING +#define CFI_POST_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#else +#define CFI_PRE_PADDING .skip CONFIG_FUNCTION_PADDING_BYTES, 0x90; +#define CFI_POST_PADDING +#endif + #define __CFI_TYPE(name) \ SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE) \ - .fill 11, 1, 0x90 ASM_NL \ + CFI_PRE_PADDING \ .byte 0xb8 ASM_NL \ .long __kcfi_typeid_##name ASM_NL \ + CFI_POST_PADDING \ SYM_FUNC_END(__cfi_##name) /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */ @@ -57,7 +104,7 @@ /* SYM_FUNC_START -- use for global functions */ #define SYM_FUNC_START(name) \ - SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */ @@ -67,7 +114,7 @@ /* SYM_FUNC_START_LOCAL -- use for local functions */ #define SYM_FUNC_START_LOCAL(name) \ - SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */ @@ -77,7 +124,7 @@ /* SYM_FUNC_START_WEAK -- use for weak functions */ #define SYM_FUNC_START_WEAK(name) \ - SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN) \ ENDBR /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index dfdb103ae4f6ff993f6b1fea9a6bd4fe942ec12a..771b0a2b7a34721185fb9eef37c8f0aeefcf3f45 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -12,8 +12,104 @@ #include #include #include +#include -#define RETPOLINE_THUNK_SIZE 32 +/* + * Call depth tracking for Intel SKL CPUs to address the RSB underflow + * issue in software. + * + * The tracking does not use a counter. It uses uses arithmetic shift + * right on call entry and logical shift left on return. + * + * The depth tracking variable is initialized to 0x8000.... when the call + * depth is zero. The arithmetic shift right sign extends the MSB and + * saturates after the 12th call. The shift count is 5 for both directions + * so the tracking covers 12 nested calls. + * + * Call + * 0: 0x8000000000000000 0x0000000000000000 + * 1: 0xfc00000000000000 0xf000000000000000 + * ... + * 11: 0xfffffffffffffff8 0xfffffffffffffc00 + * 12: 0xffffffffffffffff 0xffffffffffffffe0 + * + * After a return buffer fill the depth is credited 12 calls before the + * next stuffing has to take place. + * + * There is a inaccuracy for situations like this: + * + * 10 calls + * 5 returns + * 3 calls + * 4 returns + * 3 calls + * .... + * + * The shift count might cause this to be off by one in either direction, + * but there is still a cushion vs. the RSB depth. The algorithm does not + * claim to be perfect and it can be speculated around by the CPU, but it + * is considered that it obfuscates the problem enough to make exploitation + * extremly difficult. + */ +#define RET_DEPTH_SHIFT 5 +#define RSB_RET_STUFF_LOOPS 16 +#define RET_DEPTH_INIT 0x8000000000000000ULL +#define RET_DEPTH_INIT_FROM_CALL 0xfc00000000000000ULL +#define RET_DEPTH_CREDIT 0xffffffffffffffffULL + +#ifdef CONFIG_CALL_THUNKS_DEBUG +# define CALL_THUNKS_DEBUG_INC_CALLS \ + incq %gs:__x86_call_count; +# define CALL_THUNKS_DEBUG_INC_RETS \ + incq %gs:__x86_ret_count; +# define CALL_THUNKS_DEBUG_INC_STUFFS \ + incq %gs:__x86_stuffs_count; +# define CALL_THUNKS_DEBUG_INC_CTXSW \ + incq %gs:__x86_ctxsw_count; +#else +# define CALL_THUNKS_DEBUG_INC_CALLS +# define CALL_THUNKS_DEBUG_INC_RETS +# define CALL_THUNKS_DEBUG_INC_STUFFS +# define CALL_THUNKS_DEBUG_INC_CTXSW +#endif + +#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS) + +#include + +#define CREDIT_CALL_DEPTH \ + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define ASM_CREDIT_CALL_DEPTH \ + movq $-1, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define RESET_CALL_DEPTH \ + mov $0x80, %rax; \ + shl $56, %rax; \ + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); + +#define RESET_CALL_DEPTH_FROM_CALL \ + mov $0xfc, %rax; \ + shl $56, %rax; \ + movq %rax, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS + +#define INCREMENT_CALL_DEPTH \ + sarq $5, %gs:pcpu_hot + X86_call_depth; \ + CALL_THUNKS_DEBUG_INC_CALLS + +#define ASM_INCREMENT_CALL_DEPTH \ + sarq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth); \ + CALL_THUNKS_DEBUG_INC_CALLS + +#else +#define CREDIT_CALL_DEPTH +#define ASM_CREDIT_CALL_DEPTH +#define RESET_CALL_DEPTH +#define INCREMENT_CALL_DEPTH +#define ASM_INCREMENT_CALL_DEPTH +#define RESET_CALL_DEPTH_FROM_CALL +#endif /* * Fill the CPU return stack buffer. @@ -32,6 +128,7 @@ * from C via asm(".include ") but let's not go there. */ +#define RETPOLINE_THUNK_SIZE 32 #define RSB_CLEAR_LOOPS 32 /* To forcibly overwrite all entries */ /* @@ -60,7 +157,9 @@ dec reg; \ jnz 771b; \ /* barrier for jnz misprediction */ \ - lfence; + lfence; \ + ASM_CREDIT_CALL_DEPTH \ + CALL_THUNKS_DEBUG_INC_CTXSW #else /* * i386 doesn't unconditionally have LFENCE, as such it can't @@ -185,11 +284,32 @@ * where we have a stack but before any RET instruction. */ .macro UNTRAIN_RET -#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CALL_DEPTH_TRACKING) ANNOTATE_UNRET_END - ALTERNATIVE_2 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ - "call entry_ibpb", X86_FEATURE_ENTRY_IBPB + ALTERNATIVE_3 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ + __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH +#endif +.endm + +.macro UNTRAIN_RET_FROM_CALL +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CALL_DEPTH_TRACKING) + ANNOTATE_UNRET_END + ALTERNATIVE_3 "", \ + CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ + __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH +#endif +.endm + + +.macro CALL_DEPTH_ACCOUNT +#ifdef CONFIG_CALL_DEPTH_TRACKING + ALTERNATIVE "", \ + __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm @@ -203,11 +323,45 @@ typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE]; extern retpoline_thunk_t __x86_indirect_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; +extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; extern void __x86_return_thunk(void); extern void zen_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_CALL_THUNKS +extern void (*x86_return_thunk)(void); +#else +#define x86_return_thunk (&__x86_return_thunk) +#endif + +#ifdef CONFIG_CALL_DEPTH_TRACKING +extern void __x86_return_skl(void); + +static inline void x86_set_skl_return_thunk(void) +{ + x86_return_thunk = &__x86_return_skl; +} + +#define CALL_DEPTH_ACCOUNT \ + ALTERNATIVE("", \ + __stringify(INCREMENT_CALL_DEPTH), \ + X86_FEATURE_CALL_DEPTH) + +#ifdef CONFIG_CALL_THUNKS_DEBUG +DECLARE_PER_CPU(u64, __x86_call_count); +DECLARE_PER_CPU(u64, __x86_ret_count); +DECLARE_PER_CPU(u64, __x86_stuffs_count); +DECLARE_PER_CPU(u64, __x86_ctxsw_count); +#endif +#else +static inline void x86_set_skl_return_thunk(void) {} + +#define CALL_DEPTH_ACCOUNT "" + +#endif + #ifdef CONFIG_RETPOLINE #define GEN(reg) \ @@ -215,6 +369,16 @@ extern void entry_ibpb(void); #include #undef GEN +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg; +#include +#undef GEN + +#define GEN(reg) \ + extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg; +#include +#undef GEN + #ifdef CONFIG_X86_64 /* diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 2a0b8dd4ec33553c30584f520bed2f818f85ce62..73e9522db7c155eeb627f8c1907e39c805aa1786 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -4,13 +4,13 @@ /* Various instructions on x86 need to be replaced for * para-virtualization: those hooks are defined here. */ +#include + #ifdef CONFIG_PARAVIRT #include #include #include -#include - #ifndef __ASSEMBLY__ #include #include @@ -665,6 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu); asm(".pushsection " section ", \"ax\";" \ ".globl " PV_THUNK_NAME(func) ";" \ ".type " PV_THUNK_NAME(func) ", @function;" \ + ASM_FUNC_ALIGN \ PV_THUNK_NAME(func) ":" \ ASM_ENDBR \ FRAME_BEGIN \ @@ -730,6 +731,18 @@ static __always_inline unsigned long arch_local_irq_save(void) #undef PVOP_VCALL4 #undef PVOP_CALL4 +#define DEFINE_PARAVIRT_ASM(func, instr, sec) \ + asm (".pushsection " #sec ", \"ax\"\n" \ + ".global " #func "\n\t" \ + ".type " #func ", @function\n\t" \ + ASM_FUNC_ALIGN "\n" \ + #func ":\n\t" \ + ASM_ENDBR \ + instr "\n\t" \ + ASM_RET \ + ".size " #func ", . - " #func "\n\t" \ + ".popsection") + extern void default_banner(void); #else /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index f72bf0f6ccf71fe628f39a1aeef2f5041a61fa33..8c1da419260f96040e5e63a64c576126c35eee4b 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -2,6 +2,24 @@ #ifndef _ASM_X86_PARAVIRT_TYPES_H #define _ASM_X86_PARAVIRT_TYPES_H +#ifndef __ASSEMBLY__ +/* These all sit in the .parainstructions section to tell us what to patch. */ +struct paravirt_patch_site { + u8 *instr; /* original instructions */ + u8 type; /* type of this instruction */ + u8 len; /* length of original instruction */ +}; + +/* Lazy mode for batching updates / context switch */ +enum paravirt_lazy_mode { + PARAVIRT_LAZY_NONE, + PARAVIRT_LAZY_MMU, + PARAVIRT_LAZY_CPU, +}; +#endif + +#ifdef CONFIG_PARAVIRT + #ifndef __ASSEMBLY__ #include @@ -534,13 +552,6 @@ int paravirt_disable_iospace(void); __PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2), \ PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4)) -/* Lazy mode for batching updates / context switch */ -enum paravirt_lazy_mode { - PARAVIRT_LAZY_NONE, - PARAVIRT_LAZY_MMU, - PARAVIRT_LAZY_CPU, -}; - enum paravirt_lazy_mode paravirt_get_lazy_mode(void); void paravirt_start_context_switch(struct task_struct *prev); void paravirt_end_context_switch(struct task_struct *next); @@ -556,16 +567,9 @@ unsigned long paravirt_ret0(void); #define paravirt_nop ((void *)_paravirt_nop) -/* These all sit in the .parainstructions section to tell us what to patch. */ -struct paravirt_patch_site { - u8 *instr; /* original instructions */ - u8 type; /* type of this instruction */ - u8 len; /* length of original instruction */ -}; - extern struct paravirt_patch_site __parainstructions[], __parainstructions_end[]; #endif /* __ASSEMBLY__ */ - +#endif /* CONFIG_PARAVIRT */ #endif /* _ASM_X86_PARAVIRT_TYPES_H */ diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 9ac46dbe57d48fc3d6eb53471ef725899917f1d7..5d0f6891ae611aa069710b85585bee9467720114 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -543,12 +543,12 @@ static inline void perf_check_microcode(void) { } #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); -extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr); +extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr); #else struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data); -static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr) +static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) { - return -1; + memset(lbr, 0, sizeof(*lbr)); } #endif diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index 5f6daea1ee24839756899a9041fa5b939cc838c5..2d13f25b1bd8f332b860f5bd48b6f47102ca0dae 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -4,11 +4,11 @@ #include #include +#include + #include #include -DECLARE_PER_CPU(int, __preempt_count); - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 @@ -24,7 +24,7 @@ DECLARE_PER_CPU(int, __preempt_count); */ static __always_inline int preempt_count(void) { - return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED; + return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED; } static __always_inline void preempt_count_set(int pc) @@ -32,10 +32,10 @@ static __always_inline void preempt_count_set(int pc) int old, new; do { - old = raw_cpu_read_4(__preempt_count); + old = raw_cpu_read_4(pcpu_hot.preempt_count); new = (old & PREEMPT_NEED_RESCHED) | (pc & ~PREEMPT_NEED_RESCHED); - } while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old); + } while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old); } /* @@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc) #define init_task_preempt_count(p) do { } while (0) #define init_idle_preempt_count(p, cpu) do { \ - per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \ + per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \ } while (0) /* @@ -58,17 +58,17 @@ static __always_inline void preempt_count_set(int pc) static __always_inline void set_preempt_need_resched(void) { - raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED); + raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED); } static __always_inline void clear_preempt_need_resched(void) { - raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED); + raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED); } static __always_inline bool test_preempt_need_resched(void) { - return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED); + return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED); } /* @@ -77,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void) static __always_inline void __preempt_count_add(int val) { - raw_cpu_add_4(__preempt_count, val); + raw_cpu_add_4(pcpu_hot.preempt_count, val); } static __always_inline void __preempt_count_sub(int val) { - raw_cpu_add_4(__preempt_count, -val); + raw_cpu_add_4(pcpu_hot.preempt_count, -val); } /* @@ -92,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var])); + return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e, + __percpu_arg([var])); } /* @@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void) */ static __always_inline bool should_resched(int preempt_offset) { - return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset); + return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset); } #ifdef CONFIG_PREEMPTION diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 6836c64b9819e589eedb08f5bd47d255ea95b05d..4e35c66edeb7dc028cccb9737769704d18fb2096 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -377,8 +377,6 @@ struct irq_stack { char stack[IRQ_STACK_SIZE]; } __aligned(IRQ_STACK_SIZE); -DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); - #ifdef CONFIG_X86_64 struct fixed_percpu_data { /* @@ -401,8 +399,6 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu); } -DECLARE_PER_CPU(void *, hardirq_stack_ptr); -DECLARE_PER_CPU(bool, hardirq_stack_inuse); extern asmlinkage void ignore_sysret(void); /* Save actual FS/GS selectors and bases to current->thread */ @@ -411,8 +407,6 @@ void current_save_fsgs(void); #ifdef CONFIG_STACKPROTECTOR DECLARE_PER_CPU(unsigned long, __stack_chk_guard); #endif -DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); -DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr); #endif /* !X86_64 */ struct perf_event; @@ -517,7 +511,7 @@ static __always_inline unsigned long current_top_of_stack(void) * and around vm86 mode and sp0 on x86_64 is special because of the * entry trampoline. */ - return this_cpu_read_stable(cpu_current_top_of_stack); + return this_cpu_read_stable(pcpu_hot.top_of_stack); } static __always_inline bool on_thread_stack(void) @@ -554,10 +548,9 @@ extern int sysenter_setup(void); /* Defined in head.S */ extern struct desc_ptr early_gdt_descr; -extern void switch_to_new_gdt(int); +extern void switch_gdt_and_percpu_base(int); extern void load_direct_gdt(int); extern void load_fixmap_gdt(int); -extern void load_percpu_segment(int); extern void cpu_init(void); extern void cpu_init_secondary(void); extern void cpu_init_exception_handling(void); diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index dbb38a6b4dfb6315da4fb345cf82a3b28feeaf80..42b17cf10b10e31d17ae1a02b3c00d193e46e009 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -14,8 +14,6 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); #define __pv_queued_spin_unlock __pv_queued_spin_unlock -#define PV_UNLOCK "__raw_callee_save___pv_queued_spin_unlock" -#define PV_UNLOCK_SLOWPATH "__raw_callee_save___pv_queued_spin_unlock_slowpath" /* * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock @@ -37,32 +35,27 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text"); * rsi = lockval (second argument) * rdx = internal variable (set to 0) */ -asm (".pushsection .spinlock.text, \"ax\";" - ".globl " PV_UNLOCK ";" - ".type " PV_UNLOCK ", @function;" - ".align 4,0x90;" - PV_UNLOCK ": " - ASM_ENDBR - FRAME_BEGIN - "push %rdx;" - "mov $0x1,%eax;" - "xor %edx,%edx;" - LOCK_PREFIX "cmpxchg %dl,(%rdi);" - "cmp $0x1,%al;" - "jne .slowpath;" - "pop %rdx;" +#define PV_UNLOCK_ASM \ + FRAME_BEGIN \ + "push %rdx\n\t" \ + "mov $0x1,%eax\n\t" \ + "xor %edx,%edx\n\t" \ + LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t" \ + "cmp $0x1,%al\n\t" \ + "jne .slowpath\n\t" \ + "pop %rdx\n\t" \ + FRAME_END \ + ASM_RET \ + ".slowpath:\n\t" \ + "push %rsi\n\t" \ + "movzbl %al,%esi\n\t" \ + "call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t" \ + "pop %rsi\n\t" \ + "pop %rdx\n\t" \ FRAME_END - ASM_RET - ".slowpath: " - "push %rsi;" - "movzbl %al,%esi;" - "call " PV_UNLOCK_SLOWPATH ";" - "pop %rsi;" - "pop %rdx;" - FRAME_END - ASM_RET - ".size " PV_UNLOCK ", .-" PV_UNLOCK ";" - ".popsection"); + +DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock, + PV_UNLOCK_ASM, .spinlock.text); #else /* CONFIG_64BIT */ diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index a73bced40e2419d0cec28c1fe8327db24342f1d1..b4dbb20dab1a11d509b4f702f4bde6a6269b4d18 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -3,10 +3,10 @@ #define _ASM_X86_SMP_H #ifndef __ASSEMBLY__ #include -#include -#include #include +#include +#include extern int smp_num_siblings; extern unsigned int num_processors; @@ -19,7 +19,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id); DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id); -DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid); DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid); @@ -150,11 +149,10 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r); /* * This function is needed by all SMP systems. It must _always_ be valid - * from the initial startup. We map APIC_BASE very early in page_setup(), - * so this is correct in the x86 case. + * from the initial startup. */ -#define raw_smp_processor_id() this_cpu_read(cpu_number) -#define __smp_processor_id() __this_cpu_read(cpu_number) +#define raw_smp_processor_id() this_cpu_read(pcpu_hot.cpu_number) +#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number) #ifdef CONFIG_X86_32 extern int safe_smp_processor_id(void); diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 0361626841bc00da38c9dd1713e6995914d29e5c..cb1ee53ad3b18900a6945b11bb4e267d5d77cd6e 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -5,6 +5,8 @@ #include #include +#include + /* * 32-bit intercept words in the VMCB Control Area, starting * at Byte offset 000h. @@ -161,7 +163,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area { * Offset 0x3e0, 32 bytes reserved * for use by hypervisor/software. */ - u8 reserved_sw[32]; + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; @@ -293,12 +298,13 @@ struct vmcb_save_area { struct vmcb_seg ldtr; struct vmcb_seg idtr; struct vmcb_seg tr; - u8 reserved_1[42]; + /* Reserved fields are named following their struct offset */ + u8 reserved_0xa0[42]; u8 vmpl; u8 cpl; - u8 reserved_2[4]; + u8 reserved_0xcc[4]; u64 efer; - u8 reserved_3[112]; + u8 reserved_0xd8[112]; u64 cr4; u64 cr3; u64 cr0; @@ -306,7 +312,7 @@ struct vmcb_save_area { u64 dr6; u64 rflags; u64 rip; - u8 reserved_4[88]; + u8 reserved_0x180[88]; u64 rsp; u64 s_cet; u64 ssp; @@ -321,14 +327,14 @@ struct vmcb_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_5[32]; + u8 reserved_0x248[32]; u64 g_pat; u64 dbgctl; u64 br_from; u64 br_to; u64 last_excp_from; u64 last_excp_to; - u8 reserved_6[72]; + u8 reserved_0x298[72]; u32 spec_ctrl; /* Guest version of SPEC_CTRL at 0x2E0 */ } __packed; @@ -349,12 +355,12 @@ struct sev_es_save_area { u64 vmpl2_ssp; u64 vmpl3_ssp; u64 u_cet; - u8 reserved_1[2]; + u8 reserved_0xc8[2]; u8 vmpl; u8 cpl; - u8 reserved_2[4]; + u8 reserved_0xcc[4]; u64 efer; - u8 reserved_3[104]; + u8 reserved_0xd8[104]; u64 xss; u64 cr4; u64 cr3; @@ -371,7 +377,7 @@ struct sev_es_save_area { u64 dr1_addr_mask; u64 dr2_addr_mask; u64 dr3_addr_mask; - u8 reserved_4[24]; + u8 reserved_0x1c0[24]; u64 rsp; u64 s_cet; u64 ssp; @@ -386,21 +392,21 @@ struct sev_es_save_area { u64 sysenter_esp; u64 sysenter_eip; u64 cr2; - u8 reserved_5[32]; + u8 reserved_0x248[32]; u64 g_pat; u64 dbgctl; u64 br_from; u64 br_to; u64 last_excp_from; u64 last_excp_to; - u8 reserved_7[80]; + u8 reserved_0x298[80]; u32 pkru; - u8 reserved_8[20]; - u64 reserved_9; /* rax already available at 0x01f8 */ + u32 tsc_aux; + u8 reserved_0x2f0[24]; u64 rcx; u64 rdx; u64 rbx; - u64 reserved_10; /* rsp already available at 0x01d8 */ + u64 reserved_0x320; /* rsp already available at 0x01d8 */ u64 rbp; u64 rsi; u64 rdi; @@ -412,7 +418,7 @@ struct sev_es_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_11[16]; + u8 reserved_0x380[16]; u64 guest_exit_info_1; u64 guest_exit_info_2; u64 guest_exit_int_info; @@ -425,7 +431,7 @@ struct sev_es_save_area { u64 pcpu_id; u64 event_inj; u64 xcr0; - u8 reserved_12[16]; + u8 reserved_0x3f0[16]; /* Floating point area */ u64 x87_dp; @@ -443,23 +449,23 @@ struct sev_es_save_area { } __packed; struct ghcb_save_area { - u8 reserved_1[203]; + u8 reserved_0x0[203]; u8 cpl; - u8 reserved_2[116]; + u8 reserved_0xcc[116]; u64 xss; - u8 reserved_3[24]; + u8 reserved_0x148[24]; u64 dr7; - u8 reserved_4[16]; + u8 reserved_0x168[16]; u64 rip; - u8 reserved_5[88]; + u8 reserved_0x180[88]; u64 rsp; - u8 reserved_6[24]; + u8 reserved_0x1e0[24]; u64 rax; - u8 reserved_7[264]; + u8 reserved_0x200[264]; u64 rcx; u64 rdx; u64 rbx; - u8 reserved_8[8]; + u8 reserved_0x320[8]; u64 rbp; u64 rsi; u64 rdi; @@ -471,12 +477,12 @@ struct ghcb_save_area { u64 r13; u64 r14; u64 r15; - u8 reserved_9[16]; + u8 reserved_0x380[16]; u64 sw_exit_code; u64 sw_exit_info_1; u64 sw_exit_info_2; u64 sw_scratch; - u8 reserved_10[56]; + u8 reserved_0x3b0[56]; u64 xcr0; u8 valid_bitmap[16]; u64 x87_state_gpa; @@ -490,7 +496,7 @@ struct ghcb { u8 shared_buffer[GHCB_SHARED_BUF_SIZE]; - u8 reserved_1[10]; + u8 reserved_0xff0[10]; u16 protocol_version; /* negotiated SEV-ES/GHCB protocol version */ u32 ghcb_usage; } __packed; @@ -502,6 +508,9 @@ struct ghcb { #define EXPECTED_VMCB_CONTROL_AREA_SIZE 1024 #define EXPECTED_GHCB_SIZE PAGE_SIZE +#define BUILD_BUG_RESERVED_OFFSET(x, y) \ + ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y) + static inline void __unused_size_checks(void) { BUILD_BUG_ON(sizeof(struct vmcb_save_area) != EXPECTED_VMCB_SAVE_AREA_SIZE); @@ -509,6 +518,39 @@ static inline void __unused_size_checks(void) BUILD_BUG_ON(sizeof(struct sev_es_save_area) != EXPECTED_SEV_ES_SAVE_AREA_SIZE); BUILD_BUG_ON(sizeof(struct vmcb_control_area) != EXPECTED_VMCB_CONTROL_AREA_SIZE); BUILD_BUG_ON(sizeof(struct ghcb) != EXPECTED_GHCB_SIZE); + + /* Check offsets of reserved fields */ + + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298); + + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0); + + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380); + BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0); + + BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0); } struct vmcb { diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h index 1cc15528ce29bab81582b4d23c02ba46c9309e49..f4b87f08f5c50d6083977f22d0043c318def3002 100644 --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len); extern void text_poke_sync(void); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_copy(void *addr, const void *opcode, size_t len); +extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok); extern void *text_poke_set(void *addr, int c, size_t len); extern int poke_int3_handler(struct pt_regs *regs); extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 46de10a809ecbd81aa30d28afd78ebcc266e97a5..e48deab8901d4ecb9fddaaf334247e27fb6c1762 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -53,14 +53,6 @@ /* Architectural interrupt line count. */ #define KVM_NR_INTERRUPTS 256 -struct kvm_memory_alias { - __u32 slot; /* this has a different namespace than memory slots */ - __u32 flags; - __u64 guest_phys_addr; - __u64 memory_size; - __u64 target_phys_addr; -}; - /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */ struct kvm_pic_state { __u8 last_irr; /* edge detection */ @@ -214,6 +206,8 @@ struct kvm_msr_list { struct kvm_msr_filter_range { #define KVM_MSR_FILTER_READ (1 << 0) #define KVM_MSR_FILTER_WRITE (1 << 1) +#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \ + KVM_MSR_FILTER_WRITE) __u32 flags; __u32 nmsrs; /* number of msrs in bitmap */ __u32 base; /* MSR index the bitmap starts at */ @@ -222,8 +216,11 @@ struct kvm_msr_filter_range { #define KVM_MSR_FILTER_MAX_RANGES 16 struct kvm_msr_filter { +#ifndef __KERNEL__ #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0) +#endif #define KVM_MSR_FILTER_DEFAULT_DENY (1 << 0) +#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY) __u32 flags; struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES]; }; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index cceaafdd2d84a5725db58e260df05bed2f52ff0f..96d51bbc2bd420b95c31fd24f50a242a2c7be205 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -143,6 +143,8 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o obj-$(CONFIG_CFI_CLANG) += cfi.o +obj-$(CONFIG_CALL_THUNKS) += callthunks.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index a9bea860e22ab0cf10a37eea51275949b4a03d99..23cbfa8d34c581923559491e48c741de530c6a5f 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len) extern s32 __retpoline_sites[], __retpoline_sites_end[]; extern s32 __return_sites[], __return_sites_end[]; +extern s32 __cfi_sites[], __cfi_sites_end[]; extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[]; extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; extern s32 __smp_locks[], __smp_locks_end[]; @@ -377,6 +378,56 @@ static int emit_indirect(int op, int reg, u8 *bytes) return i; } +static inline bool is_jcc32(struct insn *insn) +{ + /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ + return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80; +} + +static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes) +{ + u8 op = insn->opcode.bytes[0]; + int i = 0; + + /* + * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional + * tail-calls. Deal with them. + */ + if (is_jcc32(insn)) { + bytes[i++] = op; + op = insn->opcode.bytes[1]; + goto clang_jcc; + } + + if (insn->length == 6) + bytes[i++] = 0x2e; /* CS-prefix */ + + switch (op) { + case CALL_INSN_OPCODE: + __text_gen_insn(bytes+i, op, addr+i, + __x86_indirect_call_thunk_array[reg], + CALL_INSN_SIZE); + i += CALL_INSN_SIZE; + break; + + case JMP32_INSN_OPCODE: +clang_jcc: + __text_gen_insn(bytes+i, op, addr+i, + __x86_indirect_jump_thunk_array[reg], + JMP32_INSN_SIZE); + i += JMP32_INSN_SIZE; + break; + + default: + WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr); + return -1; + } + + WARN_ON_ONCE(i != insn->length); + + return i; +} + /* * Rewrite the compiler generated retpoline thunk calls. * @@ -409,8 +460,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) BUG_ON(reg == 4); if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) && - !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) + !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + return emit_call_track_retpoline(addr, insn, reg, bytes); + return -1; + } op = insn->opcode.bytes[0]; @@ -427,8 +482,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes) * [ NOP ] * 1: */ - /* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */ - if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) { + if (is_jcc32(insn)) { cc = insn->opcode.bytes[1] & 0xf; cc ^= 1; /* invert condition */ @@ -518,6 +572,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) } #ifdef CONFIG_RETHUNK + +#ifdef CONFIG_CALL_THUNKS +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; +#endif + /* * Rewrite the compiler generated return thunk tail-calls. * @@ -533,14 +592,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes) { int i = 0; - if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - return -1; + if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { + if (x86_return_thunk == __x86_return_thunk) + return -1; - bytes[i++] = RET_INSN_OPCODE; + i = JMP32_INSN_SIZE; + __text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i); + } else { + bytes[i++] = RET_INSN_OPCODE; + } for (; i < insn->length;) bytes[i++] = INT3_INSN_OPCODE; - return i; } @@ -594,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { } #ifdef CONFIG_X86_KERNEL_IBT +static void poison_endbr(void *addr, bool warn) +{ + u32 endbr, poison = gen_endbr_poison(); + + if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + return; + + if (!is_endbr(endbr)) { + WARN_ON_ONCE(warn); + return; + } + + DPRINTK("ENDBR at: %pS (%px)", addr, addr); + + /* + * When we have IBT, the lack of ENDBR will trigger #CP + */ + DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); + DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); + text_poke_early(addr, &poison, 4); +} + /* * Generated by: objtool --ibt */ @@ -602,31 +687,391 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) s32 *s; for (s = start; s < end; s++) { - u32 endbr, poison = gen_endbr_poison(); void *addr = (void *)s + *s; - if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr))) + poison_endbr(addr, true); + if (IS_ENABLED(CONFIG_FINEIBT)) + poison_endbr(addr - 16, false); + } +} + +#else + +void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } + +#endif /* CONFIG_X86_KERNEL_IBT */ + +#ifdef CONFIG_FINEIBT + +enum cfi_mode { + CFI_DEFAULT, + CFI_OFF, + CFI_KCFI, + CFI_FINEIBT, +}; + +static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT; +static bool cfi_rand __ro_after_init = true; +static u32 cfi_seed __ro_after_init; + +/* + * Re-hash the CFI hash with a boot-time seed while making sure the result is + * not a valid ENDBR instruction. + */ +static u32 cfi_rehash(u32 hash) +{ + hash ^= cfi_seed; + while (unlikely(is_endbr(hash) || is_endbr(-hash))) { + bool lsb = hash & 1; + hash >>= 1; + if (lsb) + hash ^= 0x80200003; + } + return hash; +} + +static __init int cfi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + while (str) { + char *next = strchr(str, ','); + if (next) { + *next = 0; + next++; + } + + if (!strcmp(str, "auto")) { + cfi_mode = CFI_DEFAULT; + } else if (!strcmp(str, "off")) { + cfi_mode = CFI_OFF; + cfi_rand = false; + } else if (!strcmp(str, "kcfi")) { + cfi_mode = CFI_KCFI; + } else if (!strcmp(str, "fineibt")) { + cfi_mode = CFI_FINEIBT; + } else if (!strcmp(str, "norand")) { + cfi_rand = false; + } else { + pr_err("Ignoring unknown cfi option (%s).", str); + } + + str = next; + } + + return 0; +} +early_param("cfi", cfi_parse_cmdline); + +/* + * kCFI FineIBT + * + * __cfi_\func: __cfi_\func: + * movl $0x12345678,%eax // 5 endbr64 // 4 + * nop subl $0x12345678,%r10d // 7 + * nop jz 1f // 2 + * nop ud2 // 2 + * nop 1: nop // 1 + * nop + * nop + * nop + * nop + * nop + * nop + * nop + * + * + * caller: caller: + * movl $(-0x12345678),%r10d // 6 movl $0x12345678,%r10d // 6 + * addl $-15(%r11),%r10d // 4 sub $16,%r11 // 4 + * je 1f // 2 nop4 // 4 + * ud2 // 2 + * 1: call __x86_indirect_thunk_r11 // 5 call *%r11; nop2; // 5 + * + */ + +asm( ".pushsection .rodata \n" + "fineibt_preamble_start: \n" + " endbr64 \n" + " subl $0x12345678, %r10d \n" + " je fineibt_preamble_end \n" + " ud2 \n" + " nop \n" + "fineibt_preamble_end: \n" + ".popsection\n" +); + +extern u8 fineibt_preamble_start[]; +extern u8 fineibt_preamble_end[]; + +#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start) +#define fineibt_preamble_hash 7 + +asm( ".pushsection .rodata \n" + "fineibt_caller_start: \n" + " movl $0x12345678, %r10d \n" + " sub $16, %r11 \n" + ASM_NOP4 + "fineibt_caller_end: \n" + ".popsection \n" +); + +extern u8 fineibt_caller_start[]; +extern u8 fineibt_caller_end[]; + +#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start) +#define fineibt_caller_hash 2 + +#define fineibt_caller_jmp (fineibt_caller_size - 2) + +static u32 decode_preamble_hash(void *addr) +{ + u8 *p = addr; + + /* b8 78 56 34 12 mov $0x12345678,%eax */ + if (p[0] == 0xb8) + return *(u32 *)(addr + 1); + + return 0; /* invalid hash value */ +} + +static u32 decode_caller_hash(void *addr) +{ + u8 *p = addr; + + /* 41 ba 78 56 34 12 mov $0x12345678,%r10d */ + if (p[0] == 0x41 && p[1] == 0xba) + return -*(u32 *)(addr + 2); + + /* e8 0c 78 56 34 12 jmp.d8 +12 */ + if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp) + return -*(u32 *)(addr + 2); + + return 0; /* invalid hash value */ +} + +/* .retpoline_sites */ +static int cfi_disable_callers(s32 *start, s32 *end) +{ + /* + * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate + * in tact for later usage. Also see decode_caller_hash() and + * cfi_rewrite_callers(). + */ + const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp }; + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (!hash) /* nocfi callers */ continue; - if (WARN_ON_ONCE(!is_endbr(endbr))) + text_poke_early(addr, jmp, 2); + } + + return 0; +} + +static int cfi_enable_callers(s32 *start, s32 *end) +{ + /* + * Re-enable kCFI, undo what cfi_disable_callers() did. + */ + const u8 mov[] = { 0x41, 0xba }; + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (!hash) /* nocfi callers */ continue; - DPRINTK("ENDBR at: %pS (%px)", addr, addr); + text_poke_early(addr, mov, 2); + } - /* - * When we have IBT, the lack of ENDBR will trigger #CP - */ - DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr); - DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr); - text_poke_early(addr, &poison, 4); + return 0; +} + +/* .cfi_sites */ +static int cfi_rand_preamble(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + hash = decode_preamble_hash(addr); + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", + addr, addr, 5, addr)) + return -EINVAL; + + hash = cfi_rehash(hash); + text_poke_early(addr + 1, &hash, 4); + } + + return 0; +} + +static int cfi_rewrite_preamble(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + hash = decode_preamble_hash(addr); + if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n", + addr, addr, 5, addr)) + return -EINVAL; + + text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size); + WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678); + text_poke_early(addr + fineibt_preamble_hash, &hash, 4); + } + + return 0; +} + +/* .retpoline_sites */ +static int cfi_rand_callers(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (hash) { + hash = -cfi_rehash(hash); + text_poke_early(addr + 2, &hash, 4); + } + } + + return 0; +} + +static int cfi_rewrite_callers(s32 *start, s32 *end) +{ + s32 *s; + + for (s = start; s < end; s++) { + void *addr = (void *)s + *s; + u32 hash; + + addr -= fineibt_caller_size; + hash = decode_caller_hash(addr); + if (hash) { + text_poke_early(addr, fineibt_caller_start, fineibt_caller_size); + WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678); + text_poke_early(addr + fineibt_caller_hash, &hash, 4); + } + /* rely on apply_retpolines() */ + } + + return 0; +} + +static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi, bool builtin) +{ + int ret; + + if (WARN_ONCE(fineibt_preamble_size != 16, + "FineIBT preamble wrong size: %ld", fineibt_preamble_size)) + return; + + if (cfi_mode == CFI_DEFAULT) { + cfi_mode = CFI_KCFI; + if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT)) + cfi_mode = CFI_FINEIBT; + } + + /* + * Rewrite the callers to not use the __cfi_ stubs, such that we might + * rewrite them. This disables all CFI. If this succeeds but any of the + * later stages fails, we're without CFI. + */ + ret = cfi_disable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (cfi_rand) { + if (builtin) + cfi_seed = get_random_u32(); + + ret = cfi_rand_preamble(start_cfi, end_cfi); + if (ret) + goto err; + + ret = cfi_rand_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + } + + switch (cfi_mode) { + case CFI_OFF: + if (builtin) + pr_info("Disabling CFI\n"); + return; + + case CFI_KCFI: + ret = cfi_enable_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (builtin) + pr_info("Using kCFI\n"); + return; + + case CFI_FINEIBT: + ret = cfi_rewrite_preamble(start_cfi, end_cfi); + if (ret) + goto err; + + ret = cfi_rewrite_callers(start_retpoline, end_retpoline); + if (ret) + goto err; + + if (builtin) + pr_info("Using FineIBT CFI\n"); + return; + + default: + break; } + +err: + pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n"); } #else -void __init_or_module apply_ibt_endbr(s32 *start, s32 *end) { } +static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi, bool builtin) +{ +} -#endif /* CONFIG_X86_KERNEL_IBT */ +#endif + +void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline, + s32 *start_cfi, s32 *end_cfi) +{ + return __apply_fineibt(start_retpoline, end_retpoline, + start_cfi, end_cfi, + /* .builtin = */ false); +} #ifdef CONFIG_SMP static void alternatives_smp_lock(const s32 *start, const s32 *end, @@ -934,6 +1379,9 @@ void __init alternative_instructions(void) */ apply_paravirt(__parainstructions, __parainstructions_end); + __apply_fineibt(__retpoline_sites, __retpoline_sites_end, + __cfi_sites, __cfi_sites_end, true); + /* * Rewrite the retpolines, must be done before alternatives since * those can rewrite the retpoline thunks. @@ -947,6 +1395,12 @@ void __init alternative_instructions(void) */ apply_alternatives(__alt_instructions, __alt_instructions_end); + /* + * Now all calls are established. Apply the call thunks if + * required. + */ + callthunks_patch_builtin_calls(); + apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end); #ifdef CONFIG_SMP @@ -1236,27 +1690,15 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len) return __text_poke(text_poke_memcpy, addr, opcode, len); } -/** - * text_poke_copy - Copy instructions into (an unused part of) RX memory - * @addr: address to modify - * @opcode: source of the copy - * @len: length to copy, could be more than 2x PAGE_SIZE - * - * Not safe against concurrent execution; useful for JITs to dump - * new code blocks into unused regions of RX memory. Can be used in - * conjunction with synchronize_rcu_tasks() to wait for existing - * execution to quiesce after having made sure no existing functions - * pointers are live. - */ -void *text_poke_copy(void *addr, const void *opcode, size_t len) +void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, + bool core_ok) { unsigned long start = (unsigned long)addr; size_t patched = 0; - if (WARN_ON_ONCE(core_kernel_text(start))) + if (WARN_ON_ONCE(!core_ok && core_kernel_text(start))) return NULL; - mutex_lock(&text_mutex); while (patched < len) { unsigned long ptr = start + patched; size_t s; @@ -1266,6 +1708,25 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len) __text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s); patched += s; } + return addr; +} + +/** + * text_poke_copy - Copy instructions into (an unused part of) RX memory + * @addr: address to modify + * @opcode: source of the copy + * @len: length to copy, could be more than 2x PAGE_SIZE + * + * Not safe against concurrent execution; useful for JITs to dump + * new code blocks into unused regions of RX memory. Can be used in + * conjunction with synchronize_rcu_tasks() to wait for existing + * execution to quiesce after having made sure no existing functions + * pointers are live. + */ +void *text_poke_copy(void *addr, const void *opcode, size_t len) +{ + mutex_lock(&text_mutex); + addr = text_poke_copy_locked(addr, opcode, len, false); mutex_unlock(&text_mutex); return addr; } diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index 437308004ef2e4f1345947ce318c62bc56a6036f..82c783da16a863d9b0318174ef96b076866700c1 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -107,4 +107,9 @@ static void __used common(void) OFFSET(TSS_sp0, tss_struct, x86_tss.sp0); OFFSET(TSS_sp1, tss_struct, x86_tss.sp1); OFFSET(TSS_sp2, tss_struct, x86_tss.sp2); + OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack); +#ifdef CONFIG_CALL_DEPTH_TRACKING + OFFSET(X86_call_depth, pcpu_hot, call_depth); +#endif + } diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 9b698215d2618e6b01e3112bc5ea1047447dc8a9..bb65371ea9df521e37cf6d0c27688a9fbf47af19 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -57,7 +57,7 @@ int main(void) BLANK(); #ifdef CONFIG_STACKPROTECTOR - DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary)); + OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary); BLANK(); #endif return 0; diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c new file mode 100644 index 0000000000000000000000000000000000000000..7d2c75ec9a8cd9d145085f4b9007a8ce4136ede9 --- /dev/null +++ b/arch/x86/kernel/callthunks.c @@ -0,0 +1,388 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#define pr_fmt(fmt) "callthunks: " fmt + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int __initdata_or_module debug_callthunks; + +#define prdbg(fmt, args...) \ +do { \ + if (debug_callthunks) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while(0) + +static int __init debug_thunks(char *str) +{ + debug_callthunks = 1; + return 1; +} +__setup("debug-callthunks", debug_thunks); + +#ifdef CONFIG_CALL_THUNKS_DEBUG +DEFINE_PER_CPU(u64, __x86_call_count); +DEFINE_PER_CPU(u64, __x86_ret_count); +DEFINE_PER_CPU(u64, __x86_stuffs_count); +DEFINE_PER_CPU(u64, __x86_ctxsw_count); +EXPORT_SYMBOL_GPL(__x86_ctxsw_count); +EXPORT_SYMBOL_GPL(__x86_call_count); +#endif + +extern s32 __call_sites[], __call_sites_end[]; + +struct thunk_desc { + void *template; + unsigned int template_size; +}; + +struct core_text { + unsigned long base; + unsigned long end; + const char *name; +}; + +static bool thunks_initialized __ro_after_init; + +static const struct core_text builtin_coretext = { + .base = (unsigned long)_text, + .end = (unsigned long)_etext, + .name = "builtin", +}; + +asm ( + ".pushsection .rodata \n" + ".global skl_call_thunk_template \n" + "skl_call_thunk_template: \n" + __stringify(INCREMENT_CALL_DEPTH)" \n" + ".global skl_call_thunk_tail \n" + "skl_call_thunk_tail: \n" + ".popsection \n" +); + +extern u8 skl_call_thunk_template[]; +extern u8 skl_call_thunk_tail[]; + +#define SKL_TMPL_SIZE \ + ((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template)) + +extern void error_entry(void); +extern void xen_error_entry(void); +extern void paranoid_entry(void); + +static inline bool within_coretext(const struct core_text *ct, void *addr) +{ + unsigned long p = (unsigned long)addr; + + return ct->base <= p && p < ct->end; +} + +static inline bool within_module_coretext(void *addr) +{ + bool ret = false; + +#ifdef CONFIG_MODULES + struct module *mod; + + preempt_disable(); + mod = __module_address((unsigned long)addr); + if (mod && within_module_core((unsigned long)addr, mod)) + ret = true; + preempt_enable(); +#endif + return ret; +} + +static bool is_coretext(const struct core_text *ct, void *addr) +{ + if (ct && within_coretext(ct, addr)) + return true; + if (within_coretext(&builtin_coretext, addr)) + return true; + return within_module_coretext(addr); +} + +static __init_or_module bool skip_addr(void *dest) +{ + if (dest == error_entry) + return true; + if (dest == paranoid_entry) + return true; + if (dest == xen_error_entry) + return true; + /* Does FILL_RSB... */ + if (dest == __switch_to_asm) + return true; + /* Accounts directly */ + if (dest == ret_from_fork) + return true; +#ifdef CONFIG_HOTPLUG_CPU + if (dest == start_cpu0) + return true; +#endif +#ifdef CONFIG_FUNCTION_TRACER + if (dest == __fentry__) + return true; +#endif +#ifdef CONFIG_KEXEC_CORE + if (dest >= (void *)relocate_kernel && + dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE) + return true; +#endif +#ifdef CONFIG_XEN + if (dest >= (void *)hypercall_page && + dest < (void*)hypercall_page + PAGE_SIZE) + return true; +#endif + return false; +} + +static __init_or_module void *call_get_dest(void *addr) +{ + struct insn insn; + void *dest; + int ret; + + ret = insn_decode_kernel(&insn, addr); + if (ret) + return ERR_PTR(ret); + + /* Patched out call? */ + if (insn.opcode.bytes[0] != CALL_INSN_OPCODE) + return NULL; + + dest = addr + insn.length + insn.immediate.value; + if (skip_addr(dest)) + return NULL; + return dest; +} + +static const u8 nops[] = { + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, + 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, +}; + +static __init_or_module void *patch_dest(void *dest, bool direct) +{ + unsigned int tsize = SKL_TMPL_SIZE; + u8 *pad = dest - tsize; + + /* Already patched? */ + if (!bcmp(pad, skl_call_thunk_template, tsize)) + return pad; + + /* Ensure there are nops */ + if (bcmp(pad, nops, tsize)) { + pr_warn_once("Invalid padding area for %pS\n", dest); + return NULL; + } + + if (direct) + memcpy(pad, skl_call_thunk_template, tsize); + else + text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true); + return pad; +} + +static __init_or_module void patch_call(void *addr, const struct core_text *ct) +{ + void *pad, *dest; + u8 bytes[8]; + + if (!within_coretext(ct, addr)) + return; + + dest = call_get_dest(addr); + if (!dest || WARN_ON_ONCE(IS_ERR(dest))) + return; + + if (!is_coretext(ct, dest)) + return; + + pad = patch_dest(dest, within_coretext(ct, dest)); + if (!pad) + return; + + prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr, + dest, dest, pad); + __text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE); + text_poke_early(addr, bytes, CALL_INSN_SIZE); +} + +static __init_or_module void +patch_call_sites(s32 *start, s32 *end, const struct core_text *ct) +{ + s32 *s; + + for (s = start; s < end; s++) + patch_call((void *)s + *s, ct); +} + +static __init_or_module void +patch_paravirt_call_sites(struct paravirt_patch_site *start, + struct paravirt_patch_site *end, + const struct core_text *ct) +{ + struct paravirt_patch_site *p; + + for (p = start; p < end; p++) + patch_call(p->instr, ct); +} + +static __init_or_module void +callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct) +{ + prdbg("Patching call sites %s\n", ct->name); + patch_call_sites(cs->call_start, cs->call_end, ct); + patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct); + prdbg("Patching call sites done%s\n", ct->name); +} + +void __init callthunks_patch_builtin_calls(void) +{ + struct callthunk_sites cs = { + .call_start = __call_sites, + .call_end = __call_sites_end, + .pv_start = __parainstructions, + .pv_end = __parainstructions_end + }; + + if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + return; + + pr_info("Setting up call depth tracking\n"); + mutex_lock(&text_mutex); + callthunks_setup(&cs, &builtin_coretext); + static_call_force_reinit(); + thunks_initialized = true; + mutex_unlock(&text_mutex); +} + +void *callthunks_translate_call_dest(void *dest) +{ + void *target; + + lockdep_assert_held(&text_mutex); + + if (!thunks_initialized || skip_addr(dest)) + return dest; + + if (!is_coretext(NULL, dest)) + return dest; + + target = patch_dest(dest, false); + return target ? : dest; +} + +bool is_callthunk(void *addr) +{ + unsigned int tmpl_size = SKL_TMPL_SIZE; + void *tmpl = skl_call_thunk_template; + unsigned long dest; + + dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT); + if (!thunks_initialized || skip_addr((void *)dest)) + return false; + + return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size); +} + +#ifdef CONFIG_BPF_JIT +int x86_call_depth_emit_accounting(u8 **pprog, void *func) +{ + unsigned int tmpl_size = SKL_TMPL_SIZE; + void *tmpl = skl_call_thunk_template; + + if (!thunks_initialized) + return 0; + + /* Is function call target a thunk? */ + if (func && is_callthunk(func)) + return 0; + + memcpy(*pprog, tmpl, tmpl_size); + *pprog += tmpl_size; + return tmpl_size; +} +#endif + +#ifdef CONFIG_MODULES +void noinline callthunks_patch_module_calls(struct callthunk_sites *cs, + struct module *mod) +{ + struct core_text ct = { + .base = (unsigned long)mod->core_layout.base, + .end = (unsigned long)mod->core_layout.base + mod->core_layout.size, + .name = mod->name, + }; + + if (!thunks_initialized) + return; + + mutex_lock(&text_mutex); + callthunks_setup(cs, &ct); + mutex_unlock(&text_mutex); +} +#endif /* CONFIG_MODULES */ + +#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS) +static int callthunks_debug_show(struct seq_file *m, void *p) +{ + unsigned long cpu = (unsigned long)m->private; + + seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,", + per_cpu(__x86_call_count, cpu), + per_cpu(__x86_ret_count, cpu), + per_cpu(__x86_stuffs_count, cpu), + per_cpu(__x86_ctxsw_count, cpu)); + return 0; +} + +static int callthunks_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, callthunks_debug_show, inode->i_private); +} + +static const struct file_operations dfs_ops = { + .open = callthunks_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int __init callthunks_debugfs_init(void) +{ + struct dentry *dir; + unsigned long cpu; + + dir = debugfs_create_dir("callthunks", NULL); + for_each_possible_cpu(cpu) { + void *arg = (void *)cpu; + char name [10]; + + sprintf(name, "cpu%lu", cpu); + debugfs_create_file(name, 0644, dir, arg, &dfs_ops); + } + return 0; +} +__initcall(callthunks_debugfs_init); +#endif diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index f10a921ee75658980b92d0d81238d9c16fb642ce..d7e3ceaf75c16637cf10db37afcae0b28ecf49b3 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,9 +17,6 @@ KMSAN_SANITIZE_common.o := n # As above, instrumenting secondary CPU boot code causes boot hangs. KCSAN_SANITIZE_common.o := n -# Make sure load_percpu_segment has no stackprotector -CFLAGS_common.o := -fno-stack-protector - obj-y := cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a8a5033a804d6d8a0deb3f040854ad79b964a6ef..d970ddb0cc65bedf358c32321d202036cf792923 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -787,6 +787,7 @@ enum retbleed_mitigation { RETBLEED_MITIGATION_IBPB, RETBLEED_MITIGATION_IBRS, RETBLEED_MITIGATION_EIBRS, + RETBLEED_MITIGATION_STUFF, }; enum retbleed_mitigation_cmd { @@ -794,6 +795,7 @@ enum retbleed_mitigation_cmd { RETBLEED_CMD_AUTO, RETBLEED_CMD_UNRET, RETBLEED_CMD_IBPB, + RETBLEED_CMD_STUFF, }; static const char * const retbleed_strings[] = { @@ -802,6 +804,7 @@ static const char * const retbleed_strings[] = { [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB", [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS", [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS", + [RETBLEED_MITIGATION_STUFF] = "Mitigation: Stuffing", }; static enum retbleed_mitigation retbleed_mitigation __ro_after_init = @@ -831,8 +834,12 @@ static int __init retbleed_parse_cmdline(char *str) retbleed_cmd = RETBLEED_CMD_UNRET; } else if (!strcmp(str, "ibpb")) { retbleed_cmd = RETBLEED_CMD_IBPB; + } else if (!strcmp(str, "stuff")) { + retbleed_cmd = RETBLEED_CMD_STUFF; } else if (!strcmp(str, "nosmt")) { retbleed_nosmt = true; + } else if (!strcmp(str, "force")) { + setup_force_cpu_bug(X86_BUG_RETBLEED); } else { pr_err("Ignoring unknown retbleed option (%s).", str); } @@ -879,6 +886,21 @@ static void __init retbleed_select_mitigation(void) } break; + case RETBLEED_CMD_STUFF: + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) && + spectre_v2_enabled == SPECTRE_V2_RETPOLINE) { + retbleed_mitigation = RETBLEED_MITIGATION_STUFF; + + } else { + if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING)) + pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n"); + else + pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n"); + + goto do_cmd_auto; + } + break; + do_cmd_auto: case RETBLEED_CMD_AUTO: default: @@ -916,6 +938,12 @@ do_cmd_auto: mitigate_smt = true; break; + case RETBLEED_MITIGATION_STUFF: + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH); + x86_set_skl_return_thunk(); + break; + default: break; } @@ -926,7 +954,7 @@ do_cmd_auto: /* * Let IBRS trump all on Intel without affecting the effects of the - * retbleed= cmdline option. + * retbleed= cmdline option except for call depth based stuffing */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { switch (spectre_v2_enabled) { @@ -939,7 +967,8 @@ do_cmd_auto: retbleed_mitigation = RETBLEED_MITIGATION_EIBRS; break; default: - pr_err(RETBLEED_INTEL_MSG); + if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF) + pr_err(RETBLEED_INTEL_MSG); } } @@ -1413,6 +1442,7 @@ static void __init spectre_v2_select_mitigation(void) if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) && boot_cpu_has_bug(X86_BUG_RETBLEED) && retbleed_cmd != RETBLEED_CMD_OFF && + retbleed_cmd != RETBLEED_CMD_STUFF && boot_cpu_has(X86_FEATURE_IBRS) && boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) { mode = SPECTRE_V2_IBRS; diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 62b83bc5b4b9233f9ebf51fbfa5ca91bdcb5a51f..9cfca3d7d0e207c518b7cadf192a0f1337f790d6 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -610,6 +610,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c) if (!ibt_selftest()) { pr_err("IBT selftest: Failed!\n"); + wrmsrl(MSR_IA32_S_CET, 0); setup_clear_cpu_cap(X86_FEATURE_IBT); return; } @@ -702,16 +703,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c) __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long)); -void load_percpu_segment(int cpu) -{ -#ifdef CONFIG_X86_32 - loadsegment(fs, __KERNEL_PERCPU); -#else - __loadsegment_simple(gs, 0); - wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); -#endif -} - #ifdef CONFIG_X86_32 /* The 32-bit entry code needs to find cpu_entry_area. */ DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); @@ -739,16 +730,45 @@ void load_fixmap_gdt(int cpu) } EXPORT_SYMBOL_GPL(load_fixmap_gdt); -/* - * Current gdt points %fs at the "master" per-cpu area: after this, - * it's on the real one. +/** + * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base + * @cpu: The CPU number for which this is invoked + * + * Invoked during early boot to switch from early GDT and early per CPU to + * the direct GDT and the runtime per CPU area. On 32-bit the percpu base + * switch is implicit by loading the direct GDT. On 64bit this requires + * to update GSBASE. */ -void switch_to_new_gdt(int cpu) +void __init switch_gdt_and_percpu_base(int cpu) { - /* Load the original GDT */ load_direct_gdt(cpu); - /* Reload the per-cpu base */ - load_percpu_segment(cpu); + +#ifdef CONFIG_X86_64 + /* + * No need to load %gs. It is already correct. + * + * Writing %gs on 64bit would zero GSBASE which would make any per + * CPU operation up to the point of the wrmsrl() fault. + * + * Set GSBASE to the new offset. Until the wrmsrl() happens the + * early mapping is still valid. That means the GSBASE update will + * lose any prior per CPU data which was not copied over in + * setup_per_cpu_areas(). + * + * This works even with stackprotector enabled because the + * per CPU stack canary is 0 in both per CPU areas. + */ + wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu)); +#else + /* + * %fs is already set to __KERNEL_PERCPU, but after switching GDT + * it is required to load FS again so that the 'hidden' part is + * updated from the new GDT. Up to this point the early per CPU + * translation is active. Any content of the early per CPU data + * which was not copied over in setup_per_cpu_areas() is lost. + */ + loadsegment(fs, __KERNEL_PERCPU); +#endif } static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; @@ -1993,27 +2013,18 @@ static __init int setup_clearcpuid(char *arg) } __setup("clearcpuid=", setup_clearcpuid); +DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = { + .current_task = &init_task, + .preempt_count = INIT_PREEMPT_COUNT, + .top_of_stack = TOP_OF_INIT_STACK, +}; +EXPORT_PER_CPU_SYMBOL(pcpu_hot); + #ifdef CONFIG_X86_64 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __aligned(PAGE_SIZE) __visible; EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data); -/* - * The following percpu variables are hot. Align current_task to - * cacheline size such that they fall in the same cacheline. - */ -DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = - &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); - -DEFINE_PER_CPU(void *, hardirq_stack_ptr); -DEFINE_PER_CPU(bool, hardirq_stack_inuse); - -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK; - static void wrmsrl_cstar(unsigned long val) { /* @@ -2064,20 +2075,6 @@ void syscall_init(void) #else /* CONFIG_X86_64 */ -DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; -EXPORT_PER_CPU_SYMBOL(current_task); -DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT; -EXPORT_PER_CPU_SYMBOL(__preempt_count); - -/* - * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find - * the top of the kernel stack. Use an extra percpu variable to track the - * top of the kernel stack directly. - */ -DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = - (unsigned long)&init_thread_union + THREAD_SIZE; -EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack); - #ifdef CONFIG_STACKPROTECTOR DEFINE_PER_CPU(unsigned long, __stack_chk_guard); EXPORT_PER_CPU_SYMBOL(__stack_chk_guard); @@ -2248,12 +2245,6 @@ void cpu_init(void) boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE)) cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); - /* - * Initialize the per-CPU GDT with the boot GDT, - * and set up the GDT descriptor: - */ - switch_to_new_gdt(cpu); - if (IS_ENABLED(CONFIG_X86_64)) { loadsegment(fs, 0); memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 722fd712e1cf029adff9a73c73aad44992dbcef6..b4905d5173fd36d1df190a54725eced03f9170dd 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type) static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* @@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info) static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr); + unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr); unsigned long *end = begin + (THREAD_SIZE / sizeof(long)); /* diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 6c5defd6569a3eb867b58c37fccb93dd33decaee..f05339fee7785b1da2cc2cec9713e06a1242dbe7 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info) { - unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr); + unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr); unsigned long *begin; /* diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index bd165004776d93bf998c708cac2ec2567f9739a6..cf15ef5aecff26a6fbdc231a325155ddfbd1bc3d 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void) static const char *ftrace_call_replace(unsigned long ip, unsigned long addr) { + /* + * No need to translate into a callthunk. The trampoline does + * the depth accounting itself. + */ return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr); } @@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) unsigned long size; unsigned long *ptr; void *trampoline; - void *ip; + void *ip, *dest; /* 48 8b 15 is movq (%rip), %rdx */ unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 }; unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE }; @@ -359,7 +363,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) ip = trampoline + size; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE); + __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE); else memcpy(ip, retq, sizeof(retq)); @@ -404,17 +408,19 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) /* put in the call to the function */ mutex_lock(&text_mutex); call_offset -= start_offset; + /* + * No need to translate into a callthunk. The trampoline does + * the depth accounting before the call already. + */ + dest = ftrace_ops_get_func(ops); memcpy(trampoline + call_offset, - text_gen_insn(CALL_INSN_OPCODE, - trampoline + call_offset, - ftrace_ops_get_func(ops)), CALL_INSN_SIZE); + text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest), + CALL_INSN_SIZE); mutex_unlock(&text_mutex); /* ALLOC_TRAMP flags lets us know we created it */ ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP; - set_vm_flush_reset_perms(trampoline); - if (likely(system_state != SYSTEM_BOOTING)) set_memory_ro((unsigned long)trampoline, npages); set_memory_x((unsigned long)trampoline, npages); diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 2a4be92fd1444d02f084d65c21ee4faa54076009..1265ad519249c027cae82d8941d394b8dbc50795 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -3,8 +3,9 @@ * Copyright (C) 2014 Steven Rostedt, Red Hat Inc */ -#include #include +#include +#include #include #include #include @@ -131,16 +132,19 @@ .endm SYM_TYPED_FUNC_START(ftrace_stub) + CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(ftrace_stub) SYM_TYPED_FUNC_START(ftrace_stub_graph) + CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(ftrace_stub_graph) #ifdef CONFIG_DYNAMIC_FTRACE SYM_FUNC_START(__fentry__) + CALL_DEPTH_ACCOUNT RET SYM_FUNC_END(__fentry__) EXPORT_SYMBOL(__fentry__) @@ -149,6 +153,8 @@ SYM_FUNC_START(ftrace_caller) /* save_mcount_regs fills in first two parameters */ save_mcount_regs + CALL_DEPTH_ACCOUNT + /* Stack - skipping return address of ftrace_caller */ leaq MCOUNT_REG_SIZE+8(%rsp), %rcx movq %rcx, RSP(%rsp) @@ -164,6 +170,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL) /* Only ops with REGS flag set should have CS register set */ movq $0, CS(%rsp) + /* Account for the function call below */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) ANNOTATE_NOENDBR call ftrace_stub @@ -193,6 +202,8 @@ SYM_FUNC_START(ftrace_regs_caller) save_mcount_regs 8 /* save_mcount_regs fills in first two parameters */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) ANNOTATE_NOENDBR /* Load the ftrace_ops into the 3rd parameter */ @@ -223,6 +234,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL) /* regs go into 4th parameter */ leaq (%rsp), %rcx + /* Account for the function call below */ + CALL_DEPTH_ACCOUNT + SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) ANNOTATE_NOENDBR call ftrace_stub @@ -275,7 +289,20 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) /* Restore flags */ popfq UNWIND_HINT_FUNC - RET + + /* + * The above left an extra return value on the stack; effectively + * doing a tail-call without using a register. This PUSH;RET + * pattern unbalances the RSB, inject a pointless CALL to rebalance. + */ + ANNOTATE_INTRA_FUNCTION_CALL + CALL .Ldo_rebalance + int3 +.Ldo_rebalance: + add $8, %rsp + ALTERNATIVE __stringify(RET), \ + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ + X86_FEATURE_CALL_DEPTH SYM_FUNC_END(ftrace_regs_caller) STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) @@ -284,6 +311,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller) #else /* ! CONFIG_DYNAMIC_FTRACE */ SYM_FUNC_START(__fentry__) + CALL_DEPTH_ACCOUNT + cmpq $ftrace_stub, ftrace_trace_function jnz trace RET @@ -337,6 +366,8 @@ SYM_CODE_START(return_to_handler) int3 .Ldo_rop: mov %rdi, (%rsp) - RET + ALTERNATIVE __stringify(RET), \ + __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \ + X86_FEATURE_CALL_DEPTH SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d860d437631b649dd37476ff96195fadfbf0b2e5..222efd4a09bc8861b3871f4a175013c0730f8759 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -370,6 +370,7 @@ SYM_CODE_END(secondary_startup_64) * start_secondary() via .Ljump_to_C_code. */ SYM_CODE_START(start_cpu0) + ANNOTATE_NOENDBR UNWIND_HINT_EMPTY movq initial_stack(%rip), %rsp jmp .Ljump_to_C_code diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 01833ebf5e8e3582992a8860460bbe555923eb30..dc1049c01f9b9aa19c6f986e975d382502bce062 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c @@ -52,9 +52,6 @@ static inline int check_stack_overflow(void) { return 0; } static inline void print_stack_overflow(void) { } #endif -DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr); -DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr); - static void call_on_stack(void *func, void *stack) { asm volatile("xchgl %%ebx,%%esp \n" @@ -77,7 +74,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc) u32 *isp, *prev_esp, arg1; curstk = (struct irq_stack *) current_stack(); - irqstk = __this_cpu_read(hardirq_stack_ptr); + irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr); /* * this is where we switch to the IRQ stack. However, if we are @@ -115,7 +112,7 @@ int irq_init_percpu_irqstack(unsigned int cpu) int node = cpu_to_node(cpu); struct page *ph, *ps; - if (per_cpu(hardirq_stack_ptr, cpu)) + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) return 0; ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER); @@ -127,8 +124,8 @@ int irq_init_percpu_irqstack(unsigned int cpu) return -ENOMEM; } - per_cpu(hardirq_stack_ptr, cpu) = page_address(ph); - per_cpu(softirq_stack_ptr, cpu) = page_address(ps); + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph); + per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps); return 0; } @@ -138,7 +135,7 @@ void do_softirq_own_stack(void) struct irq_stack *irqstk; u32 *isp, *prev_esp; - irqstk = __this_cpu_read(softirq_stack_ptr); + irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr); /* build the stack frame on the softirq stack */ isp = (u32 *) ((char *)irqstk + sizeof(*irqstk)); diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 1c0fb96b9e39069d4d246f32f5e5c782e8e7be38..fe0c859873d17ef472aa92aac419bc427073c6e9 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -50,7 +50,7 @@ static int map_irq_stack(unsigned int cpu) return -ENOMEM; /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #else @@ -63,14 +63,14 @@ static int map_irq_stack(unsigned int cpu) void *va = per_cpu_ptr(&irq_stack_backing_store, cpu); /* Store actual TOS to avoid adjustment in the hotpath */ - per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; + per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8; return 0; } #endif int irq_init_percpu_irqstack(unsigned int cpu) { - if (per_cpu(hardirq_stack_ptr, cpu)) + if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu)) return 0; return map_irq_stack(cpu); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b997031da4529d7009bbc737bee0a9..01b8d956aa76d65f88019c4af717060984699268 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -414,7 +414,6 @@ void *alloc_insn_page(void) if (!page) return NULL; - set_vm_flush_reset_perms(page); /* * First make the page read-only, and only then make it executable to * prevent it from being W+X in between. diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index d4e48b4a438b25d7170890ca2c281d4d96013985..1cceac5984daa902f311a4001ba1a028ab6f91b3 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -349,7 +349,7 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) { - u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); + u64 pa; WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); @@ -798,19 +798,13 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and * restoring to/from the stack. */ -asm( -".pushsection .text;" -".global __raw_callee_save___kvm_vcpu_is_preempted;" -".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" -"__raw_callee_save___kvm_vcpu_is_preempted:" -ASM_ENDBR -"movq __per_cpu_offset(,%rdi,8), %rax;" -"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" -"setne %al;" -ASM_RET -".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;" -".popsection"); +#define PV_VCPU_PREEMPTED_ASM \ + "movq __per_cpu_offset(,%rdi,8), %rax\n\t" \ + "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \ + "setne %al\n\t" +DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted, + PV_VCPU_PREEMPTED_ASM, .text); #endif static void __init kvm_guest_init(void) diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index d85a6980e263d9fc54acc8cdf58b52f3cda46e8f..705fb2a41d7dd54b3fdc1737f8c117180c4b1439 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -74,10 +74,11 @@ void *module_alloc(unsigned long size) return NULL; p = __vmalloc_node_range(size, MODULE_ALIGN, - MODULES_VADDR + get_module_load_offset(), - MODULES_END, gfp_mask, - PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE, - __builtin_return_address(0)); + MODULES_VADDR + get_module_load_offset(), + MODULES_END, gfp_mask, PAGE_KERNEL, + VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK, + NUMA_NO_NODE, __builtin_return_address(0)); + if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); return NULL; @@ -253,7 +254,8 @@ int module_finalize(const Elf_Ehdr *hdr, { const Elf_Shdr *s, *alt = NULL, *locks = NULL, *para = NULL, *orc = NULL, *orc_ip = NULL, - *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL; + *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL, + *calls = NULL, *cfi = NULL; char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { @@ -271,6 +273,10 @@ int module_finalize(const Elf_Ehdr *hdr, retpolines = s; if (!strcmp(".return_sites", secstrings + s->sh_name)) returns = s; + if (!strcmp(".call_sites", secstrings + s->sh_name)) + calls = s; + if (!strcmp(".cfi_sites", secstrings + s->sh_name)) + cfi = s; if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name)) ibt_endbr = s; } @@ -283,6 +289,22 @@ int module_finalize(const Elf_Ehdr *hdr, void *pseg = (void *)para->sh_addr; apply_paravirt(pseg, pseg + para->sh_size); } + if (retpolines || cfi) { + void *rseg = NULL, *cseg = NULL; + unsigned int rsize = 0, csize = 0; + + if (retpolines) { + rseg = (void *)retpolines->sh_addr; + rsize = retpolines->sh_size; + } + + if (cfi) { + cseg = (void *)cfi->sh_addr; + csize = cfi->sh_size; + } + + apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize); + } if (retpolines) { void *rseg = (void *)retpolines->sh_addr; apply_retpolines(rseg, rseg + retpolines->sh_size); @@ -296,6 +318,21 @@ int module_finalize(const Elf_Ehdr *hdr, void *aseg = (void *)alt->sh_addr; apply_alternatives(aseg, aseg + alt->sh_size); } + if (calls || para) { + struct callthunk_sites cs = {}; + + if (calls) { + cs.call_start = (void *)calls->sh_addr; + cs.call_end = (void *)calls->sh_addr + calls->sh_size; + } + + if (para) { + cs.pv_start = (void *)para->sh_addr; + cs.pv_end = (void *)para->sh_addr + para->sh_size; + } + + callthunks_patch_module_calls(&cs, me); + } if (ibt_endbr) { void *iseg = (void *)ibt_endbr->sh_addr; apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 7ca2d46c08cc9efc7130388da9ea1d8ecedcf94e..327757afb0276c877ec7c01af6570f90221c5385 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -37,27 +37,10 @@ * nop stub, which must not clobber anything *including the stack* to * avoid confusing the entry prologues. */ -extern void _paravirt_nop(void); -asm (".pushsection .entry.text, \"ax\"\n" - ".global _paravirt_nop\n" - "_paravirt_nop:\n\t" - ASM_ENDBR - ASM_RET - ".size _paravirt_nop, . - _paravirt_nop\n\t" - ".type _paravirt_nop, @function\n\t" - ".popsection"); +DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text); /* stub always returning 0. */ -asm (".pushsection .entry.text, \"ax\"\n" - ".global paravirt_ret0\n" - "paravirt_ret0:\n\t" - ASM_ENDBR - "xor %" _ASM_AX ", %" _ASM_AX ";\n\t" - ASM_RET - ".size paravirt_ret0, . - paravirt_ret0\n\t" - ".type paravirt_ret0, @function\n\t" - ".popsection"); - +DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text); void __init default_banner(void) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 2f314b170c9f0737d6ee673ed380d894a6978911..470c128759eab3c82b05539f13e204354452cdc1 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) arch_end_context_switch(next_p); /* - * Reload esp0 and cpu_current_top_of_stack. This changes + * Reload esp0 and pcpu_hot.top_of_stack. This changes * current_thread_info(). Refresh the SYSENTER configuration in * case prev or next is vm86. */ update_task_stack(next_p); refresh_sysenter_cs(next); - this_cpu_write(cpu_current_top_of_stack, + this_cpu_write(pcpu_hot.top_of_stack, (unsigned long)task_stack_page(next_p) + THREAD_SIZE); @@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (prev->gs | next->gs) loadsegment(gs, next->gs); - this_cpu_write(current_task, next_p); + raw_cpu_write(pcpu_hot.current_task, next_p); switch_fpu_finish(); diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e2f469175be8a78a7004abf4d3571cdd6337b5eb..4e34b3b68ebdc96a76c9bf265415ecac3a5314a8 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -563,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) int cpu = smp_processor_id(); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && - this_cpu_read(hardirq_stack_inuse)); + this_cpu_read(pcpu_hot.hardirq_stack_inuse)); if (!test_thread_flag(TIF_NEED_FPU_LOAD)) switch_fpu_prepare(prev_fpu, cpu); @@ -617,8 +617,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* * Switch the PDA and FPU contexts. */ - this_cpu_write(current_task, next_p); - this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p)); + raw_cpu_write(pcpu_hot.current_task, next_p); + raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p)); switch_fpu_finish(); diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S index 4809c0dc4eb0ceb64b8dbab19cf5cc9525352609..4a73351f87f88bdf5924e33b6b7bb92c2367e95e 100644 --- a/arch/x86/kernel/relocate_kernel_64.S +++ b/arch/x86/kernel/relocate_kernel_64.S @@ -41,6 +41,7 @@ .text .align PAGE_SIZE .code64 +SYM_CODE_START_NOALIGN(relocate_range) SYM_CODE_START_NOALIGN(relocate_kernel) UNWIND_HINT_EMPTY ANNOTATE_NOENDBR @@ -312,5 +313,5 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages) int3 SYM_CODE_END(swap_pages) - .globl kexec_control_code_size -.set kexec_control_code_size, . - relocate_kernel + .skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc +SYM_CODE_END(relocate_range); diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c index bba1abd05bfeb1d94144ad2593048e7ddfbe74c0..79bc8a97a083cdaa42b4bf1fa5e9e5b50dd3e8c2 100644 --- a/arch/x86/kernel/resource.c +++ b/arch/x86/kernel/resource.c @@ -42,8 +42,16 @@ static void remove_e820_regions(struct resource *avail) resource_clip(avail, e820_start, e820_end); if (orig.start != avail->start || orig.end != avail->end) { - pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n", - &orig, avail, e820_start, e820_end); + pr_info("resource: avoiding allocation from e820 entry [mem %#010Lx-%#010Lx]\n", + e820_start, e820_end); + if (avail->end > avail->start) + /* + * Use %pa instead of %pR because "avail" + * is typically IORESOURCE_UNSET, so %pR + * shows the size instead of addresses. + */ + pr_info("resource: remaining [mem %pa-%pa] available\n", + &avail->start, &avail->end); orig = *avail; } } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index b26123c90b4fd3dca51f43aeaae46cbfde526295..c242dc47e9cb92a6918592b3a58b0e2a2ba8e26b 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -23,9 +23,6 @@ #include #include -DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number); -EXPORT_PER_CPU_SYMBOL(cpu_number); - #ifdef CONFIG_X86_64 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load) #else @@ -172,7 +169,7 @@ void __init setup_per_cpu_areas(void) for_each_possible_cpu(cpu) { per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); - per_cpu(cpu_number, cpu) = cpu; + per_cpu(pcpu_hot.cpu_number, cpu) = cpu; setup_percpu_segment(cpu); /* * Copy data used in early init routines from the @@ -211,7 +208,7 @@ void __init setup_per_cpu_areas(void) * area. Reload any changed state for the boot CPU. */ if (!cpu) - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); } /* indicate the early static arrays will soon be gone */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c295f0e008a3438b13d135566b6d2054c0792c42..55cad72715d991bd18bf0dfb7aa3bd757f4474f7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1048,7 +1048,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) /* Just in case we booted with a single CPU. */ alternatives_enable_smp(); - per_cpu(current_task, cpu) = idle; + per_cpu(pcpu_hot.current_task, cpu) = idle; cpu_init_stack_canary(cpu, idle); /* Initialize the interrupt stack(s) */ @@ -1058,7 +1058,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) #ifdef CONFIG_X86_32 /* Stack for startup_32 can be just as for start_secondary onwards */ - per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle); + per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle); #else initial_gs = per_cpu_offset(cpu); #endif @@ -1453,7 +1453,11 @@ void arch_thaw_secondary_cpus_end(void) void __init native_smp_prepare_boot_cpu(void) { int me = smp_processor_id(); - switch_to_new_gdt(me); + + /* SMP handles this from setup_per_cpu_areas() */ + if (!IS_ENABLED(CONFIG_SMP)) + switch_gdt_and_percpu_base(me); + /* already set me in cpu_online_mask in boot_cpu_init() */ cpumask_set_cpu(me, cpu_callout_mask); cpu_set_state_online(me); diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index aaaba85d6d7ff01e596f8b77c42ac72c707b0f0d..2ebc338980bcdfe5a276ac79fbbe2a90ecc6815c 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -34,6 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, switch (type) { case CALL: + func = callthunks_translate_call_dest(func); code = text_gen_insn(CALL_INSN_OPCODE, insn, func); if (func == &__static_call_return0) { emulate = code; @@ -52,7 +53,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type, case RET: if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) - code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk); + code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk); else code = &retinsn; break; diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d1e1679f32cf42c72fe423529f4081d29c37ffb8..d317dc3d06a3a3f018f83e0fedfe0c898017e0d5 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -858,7 +858,7 @@ DEFINE_IDTENTRY_RAW(exc_int3) */ asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs) { - struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1; + struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1; if (regs != eregs) *regs = *eregs; return regs; @@ -876,7 +876,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r * trust it and switch to the current kernel stack */ if (ip_within_syscall_gap(regs)) { - sp = this_cpu_read(cpu_current_top_of_stack); + sp = this_cpu_read(pcpu_hot.top_of_stack); goto sync; } diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c index c059820dfaeaf6af5167051e084a9dc8253f01a4..cdf6c6060170008d2e2bf18d6ba7d2703776dc29 100644 --- a/arch/x86/kernel/unwind_orc.c +++ b/arch/x86/kernel/unwind_orc.c @@ -136,6 +136,21 @@ static struct orc_entry null_orc_entry = { .type = UNWIND_HINT_TYPE_CALL }; +#ifdef CONFIG_CALL_THUNKS +static struct orc_entry *orc_callthunk_find(unsigned long ip) +{ + if (!is_callthunk((void *)ip)) + return NULL; + + return &null_orc_entry; +} +#else +static struct orc_entry *orc_callthunk_find(unsigned long ip) +{ + return NULL; +} +#endif + /* Fake frame pointer entry -- used as a fallback for generated code */ static struct orc_entry orc_fp_entry = { .type = UNWIND_HINT_TYPE_CALL, @@ -189,7 +204,11 @@ static struct orc_entry *orc_find(unsigned long ip) if (orc) return orc; - return orc_ftrace_find(ip); + orc = orc_ftrace_find(ip); + if (orc) + return orc; + + return orc_callthunk_find(ip); } #ifdef CONFIG_MODULES diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 15f29053cec46e462aa01c84c2b1f33e7c89ceaf..2e0ee14229bffce61924e7d1b689864f728eda5b 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -132,18 +132,19 @@ SECTIONS CPUIDLE_TEXT LOCK_TEXT KPROBES_TEXT - ALIGN_ENTRY_TEXT_BEGIN - ENTRY_TEXT - ALIGN_ENTRY_TEXT_END SOFTIRQENTRY_TEXT - STATIC_CALL_TEXT - *(.gnu.warning) - #ifdef CONFIG_RETPOLINE __indirect_thunk_start = .; *(.text.__x86.*) __indirect_thunk_end = .; #endif + STATIC_CALL_TEXT + + ALIGN_ENTRY_TEXT_BEGIN + ENTRY_TEXT + ALIGN_ENTRY_TEXT_END + *(.gnu.warning) + } :text =0xcccc /* End of text section, which should occupy whole number of pages */ @@ -290,6 +291,13 @@ SECTIONS *(.return_sites) __return_sites_end = .; } + + . = ALIGN(8); + .call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) { + __call_sites = .; + *(.call_sites) + __call_sites_end = .; + } #endif #ifdef CONFIG_X86_KERNEL_IBT @@ -301,6 +309,15 @@ SECTIONS } #endif +#ifdef CONFIG_FINEIBT + . = ALIGN(8); + .cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) { + __cfi_sites = .; + *(.cfi_sites) + __cfi_sites_end = .; + } +#endif + /* * struct alt_inst entries. From the header (alternative.h): * "Alternative instructions for different CPU types or capabilities" @@ -493,11 +510,3 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #endif /* CONFIG_X86_64 */ - -#ifdef CONFIG_KEXEC_CORE -#include - -. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE, - "kexec control code size is too big"); -#endif - diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 67be7f217e37bdad646c288ee3a3b18ceba5939b..fbeaa9ddef5985f193afccc58a0dc36c1eeb0103 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -118,6 +118,17 @@ config KVM_AMD_SEV Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. +config KVM_SMM + bool "System Management Mode emulation" + default y + depends on KVM + help + Provides support for KVM to emulate System Management Mode (SMM) + in virtual machines. This can be used by the virtual machine + firmware to implement UEFI secure boot. + + If unsure, say Y. + config KVM_XEN bool "Support for Xen hypercall interface" depends on KVM diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index f453a0f96e243e806bdc8b96757dd8a22757573b..80e3fe184d17e64984d2f2d4adba00ec9d5ad57e 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -20,12 +20,14 @@ endif kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o kvm-$(CONFIG_KVM_XEN) += xen.o +kvm-$(CONFIG_KVM_SMM) += smm.o kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ - vmx/evmcs.o vmx/nested.o vmx/posted_intr.o + vmx/hyperv.o vmx/nested.o vmx/posted_intr.o kvm-intel-$(CONFIG_X86_SGX_KVM) += vmx/sgx.o -kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o +kvm-amd-y += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \ + svm/sev.o svm/hyperv.o ifdef CONFIG_HYPERV kvm-amd-y += svm/svm_onhyperv.o diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c92c49a0b35b048f6874ee6115bfcc2ddba48563..b14653b61470c2720267006f7e7166ebac159613 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -62,10 +62,16 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted) * This one is tied to SSB in the user API, and not * visible in /proc/cpuinfo. */ -#define KVM_X86_FEATURE_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ +#define KVM_X86_FEATURE_AMD_PSFD (13*32+28) /* Predictive Store Forwarding Disable */ #define F feature_bit -#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0) + +/* Scattered Flag - For features that are scattered by cpufeatures.h. */ +#define SF(name) \ +({ \ + BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES); \ + (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0); \ +}) /* * Magic value used by KVM when querying userspace-provided CPUID entries and @@ -543,9 +549,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf) } static __always_inline -void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) +void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_mask for non-scattered leafs. */ + /* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */ BUILD_BUG_ON(leaf < NCAPINTS); kvm_cpu_caps[leaf] = mask; @@ -555,7 +561,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask) static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask) { - /* Use kvm_cpu_cap_init_scattered for scattered leafs. */ + /* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */ BUILD_BUG_ON(leaf >= NCAPINTS); kvm_cpu_caps[leaf] &= mask; @@ -657,14 +663,19 @@ void kvm_set_cpu_caps(void) kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD); kvm_cpu_cap_mask(CPUID_7_1_EAX, - F(AVX_VNNI) | F(AVX512_BF16) + F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) | + F(AVX_IFMA) + ); + + kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX, + F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI) ); kvm_cpu_cap_mask(CPUID_D_1_EAX, F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd ); - kvm_cpu_cap_init_scattered(CPUID_12_EAX, + kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX, SF(SGX1) | SF(SGX2) | SF(SGX_EDECCSSA) ); @@ -694,7 +705,7 @@ void kvm_set_cpu_caps(void) F(CLZERO) | F(XSAVEERPTR) | F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) | F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) | - __feature_bit(KVM_X86_FEATURE_PSFD) + __feature_bit(KVM_X86_FEATURE_AMD_PSFD) ); /* @@ -913,9 +924,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) goto out; cpuid_entry_override(entry, CPUID_7_1_EAX); + cpuid_entry_override(entry, CPUID_7_1_EDX); entry->ebx = 0; entry->ecx = 0; - entry->edx = 0; } break; case 0xa: { /* Architectural Performance Monitoring */ @@ -1220,8 +1231,12 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) * Other defined bits are for MSRs that KVM does not expose: * EAX 3 SPCL, SMM page configuration lock * EAX 13 PCMSR, Prefetch control MSR + * + * KVM doesn't support SMM_CTL. + * EAX 9 SMM_CTL MSR is not supported */ entry->eax &= BIT(0) | BIT(2) | BIT(6); + entry->eax |= BIT(9); if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC)) entry->eax |= BIT(2); if (!static_cpu_has_bug(X86_BUG_NULL_SEG)) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4a43261d25a2aabc9dc3e2a00d09cd102151f8e0..5cc3efa0e21c17632de33946955bd9a9a6a72238 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -242,37 +242,6 @@ enum x86_transfer_type { X86_TRANSFER_TASK_SWITCH, }; -static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) - nr &= NR_EMULATOR_GPRS - 1; - - if (!(ctxt->regs_valid & (1 << nr))) { - ctxt->regs_valid |= 1 << nr; - ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); - } - return ctxt->_regs[nr]; -} - -static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) - nr &= NR_EMULATOR_GPRS - 1; - - BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS); - BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS); - - ctxt->regs_valid |= 1 << nr; - ctxt->regs_dirty |= 1 << nr; - return &ctxt->_regs[nr]; -} - -static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) -{ - reg_read(ctxt, nr); - return reg_write(ctxt, nr); -} - static void writeback_registers(struct x86_emulate_ctxt *ctxt) { unsigned long dirty = ctxt->regs_dirty; @@ -2338,335 +2307,15 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) return rc; } -static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt) -{ -#ifdef CONFIG_X86_64 - return ctxt->ops->guest_has_long_mode(ctxt); -#else - return false; -#endif -} - -static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags) -{ - desc->g = (flags >> 23) & 1; - desc->d = (flags >> 22) & 1; - desc->l = (flags >> 21) & 1; - desc->avl = (flags >> 20) & 1; - desc->p = (flags >> 15) & 1; - desc->dpl = (flags >> 13) & 3; - desc->s = (flags >> 12) & 1; - desc->type = (flags >> 8) & 15; -} - -static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - - selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, n); - return X86EMUL_CONTINUE; -} - -#ifdef CONFIG_X86_64 -static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate, - int n) -{ - struct desc_struct desc; - int offset; - u16 selector; - u32 base3; - - offset = 0x7e00 + n * 16; - - selector = GET_SMSTATE(u16, smstate, offset); - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, offset + 4)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, offset + 8)); - base3 = GET_SMSTATE(u32, smstate, offset + 12); - - ctxt->ops->set_segment(ctxt, selector, &desc, base3, n); - return X86EMUL_CONTINUE; -} -#endif - -static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt, - u64 cr0, u64 cr3, u64 cr4) -{ - int bad; - u64 pcid; - - /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ - pcid = 0; - if (cr4 & X86_CR4_PCIDE) { - pcid = cr3 & 0xfff; - cr3 &= ~0xfff; - } - - bad = ctxt->ops->set_cr(ctxt, 3, cr3); - if (bad) - return X86EMUL_UNHANDLEABLE; - - /* - * First enable PAE, long mode needs it before CR0.PG = 1 is set. - * Then enable protected mode. However, PCID cannot be enabled - * if EFER.LMA=0, so set it separately. - */ - bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - if (bad) - return X86EMUL_UNHANDLEABLE; - - bad = ctxt->ops->set_cr(ctxt, 0, cr0); - if (bad) - return X86EMUL_UNHANDLEABLE; - - if (cr4 & X86_CR4_PCIDE) { - bad = ctxt->ops->set_cr(ctxt, 4, cr4); - if (bad) - return X86EMUL_UNHANDLEABLE; - if (pcid) { - bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid); - if (bad) - return X86EMUL_UNHANDLEABLE; - } - - } - - return X86EMUL_CONTINUE; -} - -static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u16 selector; - u32 val, cr0, cr3, cr4; - int i; - - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); - - for (i = 0; i < 8; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); - - val = GET_SMSTATE(u32, smstate, 0x7fcc); - - if (ctxt->ops->set_dr(ctxt, 6, val)) - return X86EMUL_UNHANDLEABLE; - - val = GET_SMSTATE(u32, smstate, 0x7fc8); - - if (ctxt->ops->set_dr(ctxt, 7, val)) - return X86EMUL_UNHANDLEABLE; - - selector = GET_SMSTATE(u32, smstate, 0x7fc4); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f64)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f60)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR); - - selector = GET_SMSTATE(u32, smstate, 0x7fc0); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7f80)); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7f7c)); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); - ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f74); - dt.size = GET_SMSTATE(u32, smstate, 0x7f70); - ctxt->ops->set_gdt(ctxt, &dt); - - dt.address = GET_SMSTATE(u32, smstate, 0x7f58); - dt.size = GET_SMSTATE(u32, smstate, 0x7f54); - ctxt->ops->set_idt(ctxt, &dt); - - for (i = 0; i < 6; i++) { - int r = rsm_load_seg_32(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - cr4 = GET_SMSTATE(u32, smstate, 0x7f14); - - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8)); - - return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); -} - -#ifdef CONFIG_X86_64 -static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - struct desc_struct desc; - struct desc_ptr dt; - u64 val, cr0, cr3, cr4; - u32 base3; - u16 selector; - int i, r; - - for (i = 0; i < 16; i++) - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); - - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; - - val = GET_SMSTATE(u64, smstate, 0x7f68); - - if (ctxt->ops->set_dr(ctxt, 6, val)) - return X86EMUL_UNHANDLEABLE; - - val = GET_SMSTATE(u64, smstate, 0x7f60); - - if (ctxt->ops->set_dr(ctxt, 7, val)) - return X86EMUL_UNHANDLEABLE; - - cr0 = GET_SMSTATE(u64, smstate, 0x7f58); - cr3 = GET_SMSTATE(u64, smstate, 0x7f50); - cr4 = GET_SMSTATE(u64, smstate, 0x7f48); - ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00)); - val = GET_SMSTATE(u64, smstate, 0x7ed0); - - if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA)) - return X86EMUL_UNHANDLEABLE; - - selector = GET_SMSTATE(u32, smstate, 0x7e90); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e94)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e98)); - base3 = GET_SMSTATE(u32, smstate, 0x7e9c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e84); - dt.address = GET_SMSTATE(u64, smstate, 0x7e88); - ctxt->ops->set_idt(ctxt, &dt); - - selector = GET_SMSTATE(u32, smstate, 0x7e70); - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); - set_desc_limit(&desc, GET_SMSTATE(u32, smstate, 0x7e74)); - set_desc_base(&desc, GET_SMSTATE(u32, smstate, 0x7e78)); - base3 = GET_SMSTATE(u32, smstate, 0x7e7c); - ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR); - - dt.size = GET_SMSTATE(u32, smstate, 0x7e64); - dt.address = GET_SMSTATE(u64, smstate, 0x7e68); - ctxt->ops->set_gdt(ctxt, &dt); - - r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4); - if (r != X86EMUL_CONTINUE) - return r; - - for (i = 0; i < 6; i++) { - r = rsm_load_seg_64(ctxt, smstate, i); - if (r != X86EMUL_CONTINUE) - return r; - } - - return X86EMUL_CONTINUE; -} -#endif - static int em_rsm(struct x86_emulate_ctxt *ctxt) { - unsigned long cr0, cr4, efer; - char buf[512]; - u64 smbase; - int ret; - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) return emulate_ud(ctxt); - smbase = ctxt->ops->get_smbase(ctxt); - - ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf)); - if (ret != X86EMUL_CONTINUE) - return X86EMUL_UNHANDLEABLE; - - if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0) - ctxt->ops->set_nmi_mask(ctxt, false); - - ctxt->ops->exiting_smm(ctxt); - - /* - * Get back to real mode, to prepare a safe state in which to load - * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU - * supports long mode. - */ - if (emulator_has_longmode(ctxt)) { - struct desc_struct cs_desc; - - /* Zero CR4.PCIDE before CR0.PG. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PCIDE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE); - - /* A 32-bit code segment is required to clear EFER.LMA. */ - memset(&cs_desc, 0, sizeof(cs_desc)); - cs_desc.type = 0xb; - cs_desc.s = cs_desc.g = cs_desc.p = 1; - ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS); - } - - /* For the 64-bit case, this will clear EFER.LMA. */ - cr0 = ctxt->ops->get_cr(ctxt, 0); - if (cr0 & X86_CR0_PE) - ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); - - if (emulator_has_longmode(ctxt)) { - /* Clear CR4.PAE before clearing EFER.LME. */ - cr4 = ctxt->ops->get_cr(ctxt, 4); - if (cr4 & X86_CR4_PAE) - ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE); - - /* And finally go back to 32-bit mode. */ - efer = 0; - ctxt->ops->set_msr(ctxt, MSR_EFER, efer); - } - - /* - * Give leave_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. enter guest mode) before loading state from the SMM - * state-save area. - */ - if (ctxt->ops->leave_smm(ctxt, buf)) - goto emulate_shutdown; - -#ifdef CONFIG_X86_64 - if (emulator_has_longmode(ctxt)) - ret = rsm_load_state_64(ctxt, buf); - else -#endif - ret = rsm_load_state_32(ctxt, buf); - - if (ret != X86EMUL_CONTINUE) - goto emulate_shutdown; + if (ctxt->ops->leave_smm(ctxt)) + ctxt->ops->triple_fault(ctxt); - /* - * Note, the ctxt->ops callbacks are responsible for handling side - * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID - * runtime updates, etc... If that changes, e.g. this flow is moved - * out of the emulator to make it look more like enter_smm(), then - * those side effects need to be explicitly handled for both success - * and shutdown. - */ return emulator_recalc_and_set_mode(ctxt); - -emulate_shutdown: - ctxt->ops->triple_fault(ctxt); - return X86EMUL_CONTINUE; } static void diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 0adf4a437e85f6f7e67b7b281ebc2484b680ee2f..2c7f2a26421e825097982d800a03d09435e9917a 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -23,22 +23,25 @@ #include "ioapic.h" #include "cpuid.h" #include "hyperv.h" +#include "mmu.h" #include "xen.h" #include #include #include #include +#include #include #include +#include #include #include "trace.h" #include "irq.h" #include "fpu.h" -#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64) +#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK) static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer, bool vcpu_kick); @@ -897,13 +900,15 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, - struct hv_vp_assist_page *assist_page) +int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu) { - if (!kvm_hv_assist_page_enabled(vcpu)) - return false; - return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, - assist_page, sizeof(*assist_page)); + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu)) + return -EFAULT; + + return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, + &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page)); } EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page); @@ -954,6 +959,11 @@ int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu) hv_vcpu->vp_index = vcpu->vcpu_idx; + for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) { + INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries); + spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock); + } + return 0; } @@ -1736,6 +1746,28 @@ static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks, } } +static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[]) +{ + int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK; + unsigned long sbank; + + if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask)) + return false; + + /* + * The index into the sparse bank is the number of preceding bits in + * the valid mask. Optimize for VMs with <64 vCPUs by skipping the + * fancy math if there can't possibly be preceding bits. + */ + if (valid_bit_nr) + sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0)); + else + sbank = 0; + + return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK, + (unsigned long *)&sparse_banks[sbank]); +} + struct kvm_hv_hcall { u64 param; u64 ingpa; @@ -1749,57 +1781,173 @@ struct kvm_hv_hcall { sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS]; }; -static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, - int consumed_xmm_halves, - u64 *sparse_banks, gpa_t offset) -{ - u16 var_cnt; - int i; - - if (hc->var_cnt > 64) - return -EINVAL; - /* Ignore banks that cannot possibly contain a legal VP index. */ - var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS); +static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc, + u16 orig_cnt, u16 cnt_cap, u64 *data, + int consumed_xmm_halves, gpa_t offset) +{ + /* + * Preserve the original count when ignoring entries via a "cap", KVM + * still needs to validate the guest input (though the non-XMM path + * punts on the checks). + */ + u16 cnt = min(orig_cnt, cnt_cap); + int i, j; if (hc->fast) { /* * Each XMM holds two sparse banks, but do not count halves that * have already been consumed for hypercall parameters. */ - if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves) + if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves) return HV_STATUS_INVALID_HYPERCALL_INPUT; - for (i = 0; i < var_cnt; i++) { - int j = i + consumed_xmm_halves; + + for (i = 0; i < cnt; i++) { + j = i + consumed_xmm_halves; if (j % 2) - sparse_banks[i] = sse128_hi(hc->xmm[j / 2]); + data[i] = sse128_hi(hc->xmm[j / 2]); else - sparse_banks[i] = sse128_lo(hc->xmm[j / 2]); + data[i] = sse128_lo(hc->xmm[j / 2]); } return 0; } - return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks, - var_cnt * sizeof(*sparse_banks)); + return kvm_read_guest(kvm, hc->ingpa + offset, data, + cnt * sizeof(*data)); +} + +static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc, + u64 *sparse_banks, int consumed_xmm_halves, + gpa_t offset) +{ + if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS) + return -EINVAL; + + /* Cap var_cnt to ignore banks that cannot contain a legal VP index. */ + return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS, + sparse_banks, consumed_xmm_halves, offset); +} + +static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[], + int consumed_xmm_halves, gpa_t offset) +{ + return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, + entries, consumed_xmm_halves, offset); +} + +static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo, + u64 *entries, int count) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY; + + if (!hv_vcpu) + return; + + spin_lock(&tlb_flush_fifo->write_lock); + + /* + * All entries should fit on the fifo leaving one free for 'flush all' + * entry in case another request comes in. In case there's not enough + * space, just put 'flush all' entry there. + */ + if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) { + WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count); + goto out_unlock; + } + + /* + * Note: full fifo always contains 'flush all' entry, no need to check the + * return value. + */ + kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1); + +out_unlock: + spin_unlock(&tlb_flush_fifo->write_lock); +} + +int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE]; + int i, j, count; + gva_t gva; + + if (!tdp_enabled || !hv_vcpu) + return -EINVAL; + + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu)); + + count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE); + + for (i = 0; i < count; i++) { + if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY) + goto out_flush_all; + + /* + * Lower 12 bits of 'address' encode the number of additional + * pages to flush. + */ + gva = entries[i] & PAGE_MASK; + for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++) + static_call(kvm_x86_flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE); + + ++vcpu->stat.tlb_flush; + } + return 0; + +out_flush_all: + kfifo_reset_out(&tlb_flush_fifo->entries); + + /* Fall back to full flush. */ + return -ENOSPC; } static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_tlb_flush_ex flush_ex; struct hv_tlb_flush flush; DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + /* + * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE' + * entries on the TLB flush fifo. The last entry, however, needs to be + * always left free for 'flush all' entry which gets placed when + * there is not enough space to put all the requested entries. + */ + u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1]; + u64 *tlb_flush_entries; u64 valid_bank_mask; - u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; + struct kvm_vcpu *v; + unsigned long i; bool all_cpus; + int consumed_xmm_halves = 0; + gpa_t data_offset; /* - * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the - * valid mask is a u64. Fail the build if KVM's max allowed number of - * vCPUs (>4096) would exceed this limit, KVM will additional changes - * for Hyper-V support to avoid setting the guest up to fail. + * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS + * sparse banks. Fail the build if KVM's max allowed number of + * vCPUs (>4096) exceeds this limit. */ - BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64); + BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS); + + /* + * 'Slow' hypercall's first parameter is the address in guest's memory + * where hypercall parameters are placed. This is either a GPA or a + * nested GPA when KVM is handling the call from L2 ('direct' TLB + * flush). Translate the address here so the memory can be uniformly + * read with kvm_read_guest(). + */ + if (!hc->fast && is_guest_mode(vcpu)) { + hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL); + if (unlikely(hc->ingpa == INVALID_GPA)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) { @@ -1807,14 +1955,17 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush.address_space = hc->ingpa; flush.flags = hc->outgpa; flush.processor_mask = sse128_lo(hc->xmm[0]); + consumed_xmm_halves = 1; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush, sizeof(flush)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; + data_offset = sizeof(flush); } trace_kvm_hv_flush_tlb(flush.processor_mask, - flush.address_space, flush.flags); + flush.address_space, flush.flags, + is_guest_mode(vcpu)); valid_bank_mask = BIT_ULL(0); sparse_banks[0] = flush.processor_mask; @@ -1834,16 +1985,18 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) flush_ex.flags = hc->outgpa; memcpy(&flush_ex.hv_vp_set, &hc->xmm[0], sizeof(hc->xmm[0])); + consumed_xmm_halves = 2; } else { if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex, sizeof(flush_ex)))) return HV_STATUS_INVALID_HYPERCALL_INPUT; + data_offset = sizeof(flush_ex); } trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask, flush_ex.hv_vp_set.format, flush_ex.address_space, - flush_ex.flags); + flush_ex.flags, is_guest_mode(vcpu)); valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask; all_cpus = flush_ex.hv_vp_set.format != @@ -1852,29 +2005,95 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (hc->var_cnt != hweight64(valid_bank_mask)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - if (all_cpus) - goto do_flush; + if (!all_cpus) { + if (!hc->var_cnt) + goto ret_success; - if (!hc->var_cnt) - goto ret_success; + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, + consumed_xmm_halves, data_offset)) + return HV_STATUS_INVALID_HYPERCALL_INPUT; + } - if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks, - offsetof(struct hv_tlb_flush_ex, - hv_vp_set.bank_contents))) + /* + * Hyper-V TLFS doesn't explicitly forbid non-empty sparse vCPU + * banks (and, thus, non-zero 'var_cnt') for the 'all vCPUs' + * case (HV_GENERIC_SET_ALL). Always adjust data_offset and + * consumed_xmm_halves to make sure TLB flush entries are read + * from the correct offset. + */ + data_offset += hc->var_cnt * sizeof(sparse_banks[0]); + consumed_xmm_halves += hc->var_cnt; + } + + if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || + hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX || + hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) { + tlb_flush_entries = NULL; + } else { + if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries, + consumed_xmm_halves, data_offset)) return HV_STATUS_INVALID_HYPERCALL_INPUT; + tlb_flush_entries = __tlb_flush_entries; } -do_flush: /* * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - if (all_cpus) { - kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST); - } else { + if (all_cpus && !is_guest_mode(vcpu)) { + kvm_for_each_vcpu(i, v, kvm) { + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); + } + + kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH); + } else if (!is_guest_mode(vcpu)) { sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask); + for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) { + v = kvm_get_vcpu(kvm, i); + if (!v) + continue; + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); + } + + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); + } else { + struct kvm_vcpu_hv *hv_v; + + bitmap_zero(vcpu_mask, KVM_MAX_VCPUS); + + kvm_for_each_vcpu(i, v, kvm) { + hv_v = to_hv_vcpu(v); + + /* + * The following check races with nested vCPUs entering/exiting + * and/or migrating between L1's vCPUs, however the only case when + * KVM *must* flush the TLB is when the target L2 vCPU keeps + * running on the same L1 vCPU from the moment of the request until + * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other + * cases, e.g. when the target L2 vCPU migrates to a different L1 + * vCPU or when the corresponding L1 vCPU temporary switches to a + * different L2 vCPU while the request is being processed. + */ + if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id) + continue; + + if (!all_cpus && + !hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask, + sparse_banks)) + continue; + + __set_bit(i, vcpu_mask); + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true); + hv_tlb_flush_enqueue(v, tlb_flush_fifo, + tlb_flush_entries, hc->rep_cnt); + } + + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask); } ret_success: @@ -1883,8 +2102,8 @@ ret_success: ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET); } -static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, - unsigned long *vcpu_bitmap) +static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector, + u64 *sparse_banks, u64 valid_bank_mask) { struct kvm_lapic_irq irq = { .delivery_mode = APIC_DM_FIXED, @@ -1894,7 +2113,9 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, unsigned long i; kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu_bitmap && !test_bit(i, vcpu_bitmap)) + if (sparse_banks && + !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu), + valid_bank_mask, sparse_banks)) continue; /* We fail only when APIC is disabled */ @@ -1904,12 +2125,12 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector, static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u64 *sparse_banks = hv_vcpu->sparse_banks; struct kvm *kvm = vcpu->kvm; struct hv_send_ipi_ex send_ipi_ex; struct hv_send_ipi send_ipi; - DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS); u64 valid_bank_mask; - u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS]; u32 vector; bool all_cpus; @@ -1959,7 +2180,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc) if (!hc->var_cnt) goto ret_success; - if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks, + if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1, offsetof(struct hv_send_ipi_ex, vp_set.bank_contents))) return HV_STATUS_INVALID_HYPERCALL_INPUT; @@ -1969,13 +2190,10 @@ check_and_send_ipi: if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) return HV_STATUS_INVALID_HYPERCALL_INPUT; - if (all_cpus) { - kvm_send_ipi_to_many(kvm, vector, NULL); - } else { - sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask); - - kvm_send_ipi_to_many(kvm, vector, vcpu_mask); - } + if (all_cpus) + kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0); + else + kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask); ret_success: return HV_STATUS_SUCCESS; @@ -2062,10 +2280,25 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { + u32 tlb_lock_count = 0; + int ret; + + if (hv_result_success(result) && is_guest_mode(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu) && + kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa, + &tlb_lock_count, sizeof(tlb_lock_count))) + result = HV_STATUS_INVALID_HYPERCALL_INPUT; + trace_kvm_hv_hypercall_done(result); kvm_hv_hypercall_set_result(vcpu, result); ++vcpu->stat.hypercalls; - return kvm_skip_emulated_instruction(vcpu); + + ret = kvm_skip_emulated_instruction(vcpu); + + if (tlb_lock_count) + kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu); + + return ret; } static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) @@ -2502,6 +2735,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->ebx |= HV_DEBUGGING; ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + ent->edx |= HV_FEATURE_EXT_GVA_RANGES_FLUSH; /* * Direct Synthetic timers only make sense with in-kernel @@ -2545,6 +2779,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_NESTED_FEATURES: ent->eax = evmcs_ver; + ent->eax |= HV_X64_NESTED_DIRECT_FLUSH; ent->eax |= HV_X64_NESTED_MSR_BITMAP; ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL; break; diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 1030b1b50552373666a59d371e99202389abfab0..9f96414a31c52d2a514582422855d5149d1ebfc7 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -22,6 +22,7 @@ #define __ARCH_X86_KVM_HYPERV_H__ #include +#include "x86.h" /* "Hv#1" signature */ #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648 @@ -107,8 +108,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu); -bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu, - struct hv_vp_assist_page *assist_page); +int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu); static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu, int timer_index) @@ -151,4 +151,64 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args); int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, struct kvm_cpuid_entry2 __user *entries); +static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struct kvm_vcpu *vcpu, + bool is_guest_mode) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO : + HV_L1_TLB_FLUSH_FIFO; + + return &hv_vcpu->tlb_flush_fifo[i]; +} + +static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo; + + if (!to_hv_vcpu(vcpu) || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + return; + + tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu)); + + kfifo_reset_out(&tlb_flush_fifo->entries); +} + +static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + return hv_vcpu && + (hv_vcpu->cpuid_cache.nested_eax & HV_X64_NESTED_DIRECT_FLUSH); +} + +static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + u16 code; + + if (!hv_vcpu) + return false; + + code = is_64_bit_hypercall(vcpu) ? kvm_rcx_read(vcpu) : + kvm_rax_read(vcpu); + + return (code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX || + code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX); +} + +static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu) +{ + if (!to_hv_vcpu(vcpu)) + return 0; + + if (!kvm_hv_assist_page_enabled(vcpu)) + return 0; + + return kvm_hv_get_assist_page(vcpu); +} + +int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu); + #endif diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index f371f1292ca3e0dcd3cf05132ce4fbf7153949ba..a70952eca9058196e1bd0b49f2c10be76637dd2b 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -31,7 +31,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return r; } -EXPORT_SYMBOL(kvm_cpu_has_pending_timer); /* * check if there is a pending userspace external interrupt @@ -150,7 +149,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu) if (kvm_xen_timer_enabled(vcpu)) kvm_xen_inject_timer_irqs(vcpu); } -EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs); void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { @@ -165,3 +163,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm); } + +bool kvm_arch_irqchip_in_kernel(struct kvm *kvm) +{ + return irqchip_in_kernel(kvm); +} diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 3febc342360cc7524b6c1665ee7621a1d0e1228e..c09174f73a344f79d61967ed2e2e5ca55c4d5e9b 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu) return vcpu->arch.hflags & HF_GUEST_MASK; } -static inline bool is_smm(struct kvm_vcpu *vcpu) -{ - return vcpu->arch.hflags & HF_SMM_MASK; -} - #endif diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 89246446d6aa9d5254a603a29163f7aa62547896..2d9662be8333781c8851cd779869b680bd39b1b1 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -116,16 +116,6 @@ struct x86_emulate_ops { unsigned int bytes, struct x86_exception *fault, bool system); - /* - * read_phys: Read bytes of standard (non-emulated/special) memory. - * Used for descriptor reading. - * @addr: [IN ] Physical address from which to read. - * @val: [OUT] Value read from memory. - * @bytes: [IN ] Number of bytes to read from memory. - */ - int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr, - void *val, unsigned int bytes); - /* * write_std: Write bytes of standard (non-emulated/special) memory. * Used for descriptor writing. @@ -209,11 +199,8 @@ struct x86_emulate_ops { int (*cpl)(struct x86_emulate_ctxt *ctxt); void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest); int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value); - u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt); - void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase); int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); - int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data); int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata); int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc); int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata); @@ -234,8 +221,7 @@ struct x86_emulate_ops { void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked); unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt); - void (*exiting_smm)(struct x86_emulate_ctxt *ctxt); - int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate); + int (*leave_smm)(struct x86_emulate_ctxt *ctxt); void (*triple_fault)(struct x86_emulate_ctxt *ctxt); int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr); }; @@ -292,7 +278,6 @@ enum x86emul_mode { /* These match some of the HF_* flags defined in kvm_host.h */ #define X86EMUL_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ #define X86EMUL_SMM_MASK (1 << 6) -#define X86EMUL_SMM_INSIDE_NMI_MASK (1 << 7) /* * fastop functions are declared as taking a never-defined fastop parameter, @@ -526,4 +511,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); +static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) + nr &= NR_EMULATOR_GPRS - 1; + + if (!(ctxt->regs_valid & (1 << nr))) { + ctxt->regs_valid |= 1 << nr; + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); + } + return ctxt->_regs[nr]; +} + +static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt)) + nr &= NR_EMULATOR_GPRS - 1; + + BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS); + BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS); + + ctxt->regs_valid |= 1 << nr; + ctxt->regs_dirty |= 1 << nr; + return &ctxt->_regs[nr]; +} + +static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) +{ + reg_read(ctxt, nr); + return reg_write(ctxt, nr); +} + #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index d7639d126e6c7ae255f6bce48634ebe1102a5b84..4efdb4a4d72c6084f67f00ce8ae06292f68532e6 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -42,6 +42,7 @@ #include "x86.h" #include "cpuid.h" #include "hyperv.h" +#include "smm.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -159,7 +160,6 @@ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) && !(kvm_mwait_in_guest(vcpu->kvm) || kvm_can_post_timer_interrupt(vcpu)); } -EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { @@ -1170,9 +1170,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, break; case APIC_DM_SMI: - result = 1; - kvm_make_request(KVM_REQ_SMI, vcpu); - kvm_vcpu_kick(vcpu); + if (!kvm_inject_smi(vcpu)) { + kvm_vcpu_kick(vcpu); + result = 1; + } break; case APIC_DM_NMI: @@ -1912,7 +1913,6 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) return vcpu->arch.apic->lapic_timer.hv_timer_in_use; } -EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); static void cancel_hv_timer(struct kvm_lapic *apic) { @@ -2430,7 +2430,6 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu) apic->isr_count = count_vectors(apic->regs + APIC_ISR); } } -EXPORT_SYMBOL_GPL(kvm_apic_update_apicv); void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) { @@ -2722,8 +2721,6 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR); __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32); } - } else { - kvm_lapic_xapic_id_updated(vcpu->arch.apic); } return 0; @@ -2759,6 +2756,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) } memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s)); + if (!apic_x2apic_mode(apic)) + kvm_lapic_xapic_id_updated(apic); + atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY); kvm_recalculate_apic_map(vcpu->kvm); kvm_apic_set_version(vcpu); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a5ac4a5a517920ed8dff0ba1f14cde110606832d..28e3769066e215332197df09381ec67252a67a5d 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -7,7 +7,7 @@ #include #include "hyperv.h" -#include "kvm_cache_regs.h" +#include "smm.h" #define KVM_APIC_INIT 0 #define KVM_APIC_SIPI 1 diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index b6f96d47e596d1018987755a2f96a8a9305fda0c..835426254e768c1e7f50aab3f84867bd75906f56 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -22,6 +22,7 @@ #include "tdp_mmu.h" #include "x86.h" #include "kvm_cache_regs.h" +#include "smm.h" #include "kvm_emulate.h" #include "cpuid.h" #include "spte.h" @@ -802,15 +803,31 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_flush_remote_tlbs_with_address(kvm, gfn, 1); } -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { - if (sp->lpage_disallowed) + /* + * If it's possible to replace the shadow page with an NX huge page, + * i.e. if the shadow page is the only thing currently preventing KVM + * from using a huge page, add the shadow page to the list of "to be + * zapped for NX recovery" pages. Note, the shadow page can already be + * on the list if KVM is reusing an existing shadow page, i.e. if KVM + * links a shadow page at multiple points. + */ + if (!list_empty(&sp->possible_nx_huge_page_link)) return; ++kvm->stat.nx_lpage_splits; - list_add_tail(&sp->lpage_disallowed_link, - &kvm->arch.lpage_disallowed_mmu_pages); - sp->lpage_disallowed = true; + list_add_tail(&sp->possible_nx_huge_page_link, + &kvm->arch.possible_nx_huge_pages); +} + +static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool nx_huge_page_possible) +{ + sp->nx_huge_page_disallowed = true; + + if (nx_huge_page_possible) + track_possible_nx_huge_page(kvm, sp); } static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) @@ -830,11 +847,20 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) kvm_mmu_gfn_allow_lpage(slot, gfn); } -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) { + if (list_empty(&sp->possible_nx_huge_page_link)) + return; + --kvm->stat.nx_lpage_splits; - sp->lpage_disallowed = false; - list_del(&sp->lpage_disallowed_link); + list_del_init(&sp->possible_nx_huge_page_link); +} + +static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp) +{ + sp->nx_huge_page_disallowed = false; + + untrack_possible_nx_huge_page(kvm, sp); } static struct kvm_memory_slot * @@ -1645,7 +1671,7 @@ static int is_empty_shadow_page(u64 *spt) u64 *pos; u64 *end; - for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++) + for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++) if (is_shadow_present_pte(*pos)) { printk(KERN_ERR "%s: %p %llx\n", __func__, pos, *pos); @@ -1793,7 +1819,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, continue; } - child = to_shadow_page(ent & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(ent); if (child->unsync_children) { if (mmu_pages_add(pvec, child, i)) @@ -1894,7 +1920,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp) if (sp->role.invalid) return true; - /* TDP MMU pages due not use the MMU generation. */ + /* TDP MMU pages do not use the MMU generation. */ return !sp->tdp_mmu_page && unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen); } @@ -2129,6 +2155,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm, set_page_private(virt_to_page(sp->spt), (unsigned long)sp); + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); + /* * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages() * depends on valid pages being added to the head of the list. See @@ -2350,7 +2378,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep, * so we should update the spte at this point to get * a new sp with the correct access. */ - child = to_shadow_page(*sptep & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(*sptep); if (child->role.access == direct_access) return; @@ -2371,7 +2399,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp, if (is_last_spte(pte, sp->role.level)) { drop_spte(kvm, spte); } else { - child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(pte); drop_parent_pte(child, spte); /* @@ -2487,8 +2515,8 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm, zapped_root = !is_obsolete_sp(kvm, sp); } - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + if (sp->nx_huge_page_disallowed) + unaccount_nx_huge_page(kvm, sp); sp->role.invalid = 1; @@ -2811,7 +2839,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot, struct kvm_mmu_page *child; u64 pte = *sptep; - child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK); + child = spte_to_child_sp(pte); drop_parent_pte(child, sptep); flush = true; } else if (pfn != spte_to_pfn(*sptep)) { @@ -3085,7 +3113,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ if (cur_level > PG_LEVEL_4K && cur_level == fault->goal_level && is_shadow_present_pte(spte) && - !is_large_pte(spte)) { + !is_large_pte(spte) && + spte_to_child_sp(spte)->nx_huge_page_disallowed) { /* * A small SPTE exists for this pfn, but FNAME(fetch) * and __direct_map would like to create a large PTE @@ -3127,9 +3156,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) continue; link_shadow_page(vcpu, it.sptep, sp); - if (fault->is_tdp && fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->huge_page_disallowed) + account_nx_huge_page(vcpu->kvm, sp, + fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) @@ -3149,8 +3178,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk); } -static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) +static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn) { + if (is_sigpending_pfn(pfn)) { + kvm_handle_signal_exit(vcpu); + return -EINTR; + } + /* * Do not cache the mmio info caused by writing the readonly gfn * into the spte otherwise read access on readonly gfn also can @@ -3172,7 +3206,7 @@ static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fau { /* The pfn is invalid, report the error! */ if (unlikely(is_error_pfn(fault->pfn))) - return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn); + return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn); if (unlikely(!fault->slot)) { gva_t gva = fault->is_tdp ? 0 : fault->addr; @@ -3423,7 +3457,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, if (!VALID_PAGE(*root_hpa)) return; - sp = to_shadow_page(*root_hpa & SPTE_BASE_ADDR_MASK); + /* + * The "root" may be a special root, e.g. a PAE entry, treat it as a + * SPTE to ensure any non-PA bits are dropped. + */ + sp = spte_to_child_sp(*root_hpa); if (WARN_ON(!sp)) return; @@ -3908,8 +3946,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu) hpa_t root = vcpu->arch.mmu->pae_root[i]; if (IS_VALID_PAE_ROOT(root)) { - root &= SPTE_BASE_ADDR_MASK; - sp = to_shadow_page(root); + sp = spte_to_child_sp(root); mmu_sync_children(vcpu, sp, true); } } @@ -4170,7 +4207,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } async = false; - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async, + fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async, fault->write, &fault->map_writable, &fault->hva); if (!async) @@ -4187,7 +4224,12 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) } } - fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL, + /* + * Allow gup to bail on pending non-fatal signals when it's also allowed + * to wait for IO. Note, gup always bails if it is unable to quickly + * get a page and a fatal signal, i.e. SIGKILL, is pending. + */ + fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL, fault->write, &fault->map_writable, &fault->hva); return RET_PF_CONTINUE; @@ -5972,7 +6014,7 @@ int kvm_mmu_init_vm(struct kvm *kvm) INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); - INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages); spin_lock_init(&kvm->arch.mmu_unsync_pages_lock); r = kvm_mmu_init_tdp_mmu(kvm); @@ -6657,7 +6699,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) kvm_mmu_zap_all_fast(kvm); mutex_unlock(&kvm->slots_lock); - wake_up_process(kvm->arch.nx_lpage_recovery_thread); + wake_up_process(kvm->arch.nx_huge_page_recovery_thread); } mutex_unlock(&kvm_lock); } @@ -6789,7 +6831,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel mutex_lock(&kvm_lock); list_for_each_entry(kvm, &vm_list, vm_list) - wake_up_process(kvm->arch.nx_lpage_recovery_thread); + wake_up_process(kvm->arch.nx_huge_page_recovery_thread); mutex_unlock(&kvm_lock); } @@ -6797,9 +6839,10 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel return err; } -static void kvm_recover_nx_lpages(struct kvm *kvm) +static void kvm_recover_nx_huge_pages(struct kvm *kvm) { unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits; + struct kvm_memory_slot *slot; int rcu_idx; struct kvm_mmu_page *sp; unsigned int ratio; @@ -6820,24 +6863,55 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) ratio = READ_ONCE(nx_huge_pages_recovery_ratio); to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0; for ( ; to_zap; --to_zap) { - if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) + if (list_empty(&kvm->arch.possible_nx_huge_pages)) break; /* * We use a separate list instead of just using active_mmu_pages - * because the number of lpage_disallowed pages is expected to - * be relatively small compared to the total. + * because the number of shadow pages that be replaced with an + * NX huge page is expected to be relatively small compared to + * the total number of shadow pages. And because the TDP MMU + * doesn't use active_mmu_pages. */ - sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, + sp = list_first_entry(&kvm->arch.possible_nx_huge_pages, struct kvm_mmu_page, - lpage_disallowed_link); - WARN_ON_ONCE(!sp->lpage_disallowed); - if (is_tdp_mmu_page(sp)) { + possible_nx_huge_page_link); + WARN_ON_ONCE(!sp->nx_huge_page_disallowed); + WARN_ON_ONCE(!sp->role.direct); + + /* + * Unaccount and do not attempt to recover any NX Huge Pages + * that are being dirty tracked, as they would just be faulted + * back in as 4KiB pages. The NX Huge Pages in this slot will be + * recovered, along with all the other huge pages in the slot, + * when dirty logging is disabled. + * + * Since gfn_to_memslot() is relatively expensive, it helps to + * skip it if it the test cannot possibly return true. On the + * other hand, if any memslot has logging enabled, chances are + * good that all of them do, in which case unaccount_nx_huge_page() + * is much cheaper than zapping the page. + * + * If a memslot update is in progress, reading an incorrect value + * of kvm->nr_memslots_dirty_logging is not a problem: if it is + * becoming zero, gfn_to_memslot() will be done unnecessarily; if + * it is becoming nonzero, the page will be zapped unnecessarily. + * Either way, this only affects efficiency in racy situations, + * and not correctness. + */ + slot = NULL; + if (atomic_read(&kvm->nr_memslots_dirty_logging)) { + slot = gfn_to_memslot(kvm, sp->gfn); + WARN_ON_ONCE(!slot); + } + + if (slot && kvm_slot_dirty_track_enabled(slot)) + unaccount_nx_huge_page(kvm, sp); + else if (is_tdp_mmu_page(sp)) flush |= kvm_tdp_mmu_zap_sp(kvm, sp); - } else { + else kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); - WARN_ON_ONCE(sp->lpage_disallowed); - } + WARN_ON_ONCE(sp->nx_huge_page_disallowed); if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) { kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush); @@ -6857,7 +6931,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, rcu_idx); } -static long get_nx_lpage_recovery_timeout(u64 start_time) +static long get_nx_huge_page_recovery_timeout(u64 start_time) { bool enabled; uint period; @@ -6868,19 +6942,19 @@ static long get_nx_lpage_recovery_timeout(u64 start_time) : MAX_SCHEDULE_TIMEOUT; } -static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) +static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data) { u64 start_time; long remaining_time; while (true) { start_time = get_jiffies_64(); - remaining_time = get_nx_lpage_recovery_timeout(start_time); + remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop() && remaining_time > 0) { schedule_timeout(remaining_time); - remaining_time = get_nx_lpage_recovery_timeout(start_time); + remaining_time = get_nx_huge_page_recovery_timeout(start_time); set_current_state(TASK_INTERRUPTIBLE); } @@ -6889,7 +6963,7 @@ static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) if (kthread_should_stop()) return 0; - kvm_recover_nx_lpages(kvm); + kvm_recover_nx_huge_pages(kvm); } } @@ -6897,17 +6971,17 @@ int kvm_mmu_post_init_vm(struct kvm *kvm) { int err; - err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, + err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0, "kvm-nx-lpage-recovery", - &kvm->arch.nx_lpage_recovery_thread); + &kvm->arch.nx_huge_page_recovery_thread); if (!err) - kthread_unpark(kvm->arch.nx_lpage_recovery_thread); + kthread_unpark(kvm->arch.nx_huge_page_recovery_thread); return err; } void kvm_mmu_pre_destroy_vm(struct kvm *kvm) { - if (kvm->arch.nx_lpage_recovery_thread) - kthread_stop(kvm->arch.nx_lpage_recovery_thread); + if (kvm->arch.nx_huge_page_recovery_thread) + kthread_stop(kvm->arch.nx_huge_page_recovery_thread); } diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index 582def531d4d9e12d2c281973d63f034a7f0f222..dbaf6755c5a71c009d99c797747f441190d215cf 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -57,7 +57,13 @@ struct kvm_mmu_page { bool tdp_mmu_page; bool unsync; u8 mmu_valid_gen; - bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + + /* + * The shadow page can't be replaced by an equivalent huge page + * because it is being used to map an executable page in the guest + * and the NX huge page mitigation is enabled. + */ + bool nx_huge_page_disallowed; /* * The following two entries are used to key the shadow page in the @@ -100,7 +106,14 @@ struct kvm_mmu_page { }; }; - struct list_head lpage_disallowed_link; + /* + * Tracks shadow pages that, if zapped, would allow KVM to create an NX + * huge page. A shadow page will have nx_huge_page_disallowed set but + * not be on the list if a huge page is disallowed for other reasons, + * e.g. because KVM is shadowing a PTE at the same gfn, the memslot + * isn't properly aligned, etc... + */ + struct list_head possible_nx_huge_page_link; #ifdef CONFIG_X86_32 /* * Used out of the mmu-lock to avoid reading spte values while an @@ -120,18 +133,6 @@ struct kvm_mmu_page { extern struct kmem_cache *mmu_page_header_cache; -static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page) -{ - struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT); - - return (struct kvm_mmu_page *)page_private(page); -} - -static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) -{ - return to_shadow_page(__pa(sptep)); -} - static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role) { return role.smm ? 1 : 0; @@ -315,7 +316,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_ void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc); -void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); -void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); +void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp); #endif /* __KVM_X86_MMU_INTERNAL_H */ diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 5ab5f94dcb6fdb3c95140dbc31bee11595665832..0f645507205579032d5dc11bdee646e798f16aae 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -713,9 +713,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault, continue; link_shadow_page(vcpu, it.sptep, sp); - if (fault->huge_page_disallowed && - fault->req_level >= it.level) - account_huge_nx_page(vcpu->kvm, sp); + if (fault->huge_page_disallowed) + account_nx_huge_page(vcpu->kvm, sp, + fault->req_level >= it.level); } if (WARN_ON_ONCE(it.level != fault->goal_level)) diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c index 2e08b2a45361299fe071de8f766533b2d8dcecb6..c0fd7e049b4e4b5406e0dbbfbcfa79b3078bd774 100644 --- a/arch/x86/kvm/mmu/spte.c +++ b/arch/x86/kvm/mmu/spte.c @@ -161,6 +161,18 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (!prefetch) spte |= spte_shadow_accessed_mask(spte); + /* + * For simplicity, enforce the NX huge page mitigation even if not + * strictly necessary. KVM could ignore the mitigation if paging is + * disabled in the guest, as the guest doesn't have an page tables to + * abuse. But to safely ignore the mitigation, KVM would have to + * ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG + * is toggled on, and that's a net negative for performance when TDP is + * enabled. When TDP is disabled, KVM will always switch to a new MMU + * when CR0.PG is toggled, but leveraging that to ignore the mitigation + * would tie make_spte() further to vCPU/MMU state, and add complexity + * just to optimize a mode that is anything but performance critical. + */ if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) && is_nx_huge_page_enabled(vcpu->kvm)) { pte_access &= ~ACC_EXEC_MASK; diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h index 7670c13ce251b6e08698eef477317b932e5bfdb7..1f03701b943a1cac91fe7dac6a5f3514c5b9d6dc 100644 --- a/arch/x86/kvm/mmu/spte.h +++ b/arch/x86/kvm/mmu/spte.h @@ -188,7 +188,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask; * should not modify the SPTE. * * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on - * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF + * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF * vulnerability. Use only low bits to avoid 64-bit immediates. * * Only used by the TDP MMU. @@ -219,6 +219,23 @@ static inline int spte_index(u64 *sptep) */ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask; +static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page) +{ + struct page *page = pfn_to_page((shadow_page) >> PAGE_SHIFT); + + return (struct kvm_mmu_page *)page_private(page); +} + +static inline struct kvm_mmu_page *spte_to_child_sp(u64 spte) +{ + return to_shadow_page(spte & SPTE_BASE_ADDR_MASK); +} + +static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) +{ + return to_shadow_page(__pa(sptep)); +} + static inline bool is_mmio_spte(u64 spte) { return (spte & shadow_mmio_mask) == shadow_mmio_value && diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 672f0432d7777fca9414da2bb6fc6cc13d28220b..771210ce518112f5d8446d9204e4d0eeec9ee241 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); kvm->arch.tdp_mmu_zap_wq = wq; return 1; } @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -284,6 +283,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role) { + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); + set_page_private(virt_to_page(sp->spt), (unsigned long)sp); sp->role = role; @@ -375,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, +1); + atomic64_inc(&kvm->arch.tdp_mmu_pages); } static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, -1); + atomic64_dec(&kvm->arch.tdp_mmu_pages); } /** @@ -395,14 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, bool shared) { tdp_unaccount_mmu_page(kvm, sp); + + if (!sp->nx_huge_page_disallowed) + return; + if (shared) spin_lock(&kvm->arch.tdp_mmu_pages_lock); else lockdep_assert_held_write(&kvm->mmu_lock); - list_del(&sp->link); - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + sp->nx_huge_page_disallowed = false; + untrack_possible_nx_huge_page(kvm, sp); if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1116,16 +1122,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set * @sp: The new TDP page table to install. - * @account_nx: True if this page table is being installed to split a - * non-executable huge page. * @shared: This operation is running under the MMU lock in read mode. * * Returns: 0 if the new page table was installed. Non-0 if the page table * could not be installed (e.g. the atomic compare-exchange failed). */ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *sp, bool account_nx, - bool shared) + struct kvm_mmu_page *sp, bool shared) { u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); int ret = 0; @@ -1138,16 +1141,14 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, tdp_mmu_set_spte(kvm, iter, spte); } - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - if (account_nx) - account_huge_nx_page(kvm, sp); - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); return 0; } +static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, + struct kvm_mmu_page *sp, bool shared); + /* * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing * page tables and SPTEs to translate the faulting guest physical address. @@ -1155,9 +1156,10 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu *mmu = vcpu->arch.mmu; + struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; - int ret; + int ret = RET_PF_RETRY; kvm_mmu_hugepage_adjust(vcpu, fault); @@ -1166,6 +1168,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) rcu_read_lock(); tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) { + int r; + if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); @@ -1173,57 +1177,52 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) break; /* - * If there is an SPTE mapping a large page at a higher level - * than the target, that SPTE must be cleared and replaced - * with a non-leaf SPTE. + * If SPTE has been frozen by another thread, just give up and + * retry, avoiding unnecessary page table allocation and free. */ - if (is_shadow_present_pte(iter.old_spte) && - is_large_pte(iter.old_spte)) { - if (tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter)) - break; + if (is_removed_spte(iter.old_spte)) + goto retry; - /* - * The iter must explicitly re-read the spte here - * because the new value informs the !present - * path below. - */ - iter.old_spte = kvm_tdp_mmu_read_spte(iter.sptep); - } + /* Step down into the lower level page table if it exists. */ + if (is_shadow_present_pte(iter.old_spte) && + !is_large_pte(iter.old_spte)) + continue; - if (!is_shadow_present_pte(iter.old_spte)) { - bool account_nx = fault->huge_page_disallowed && - fault->req_level >= iter.level; + /* + * The SPTE is either non-present or points to a huge page that + * needs to be split. + */ + sp = tdp_mmu_alloc_sp(vcpu); + tdp_mmu_init_child_sp(sp, &iter); - /* - * If SPTE has been frozen by another thread, just - * give up and retry, avoiding unnecessary page table - * allocation and free. - */ - if (is_removed_spte(iter.old_spte)) - break; + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; - sp = tdp_mmu_alloc_sp(vcpu); - tdp_mmu_init_child_sp(sp, &iter); + if (is_shadow_present_pte(iter.old_spte)) + r = tdp_mmu_split_huge_page(kvm, &iter, sp, true); + else + r = tdp_mmu_link_sp(kvm, &iter, sp, true); - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { - tdp_mmu_free_sp(sp); - break; - } + /* + * Also force the guest to retry the access if the upper level SPTEs + * aren't in place. + */ + if (r) { + tdp_mmu_free_sp(sp); + goto retry; } - } - /* - * Force the guest to retry the access if the upper level SPTEs aren't - * in place, or if the target leaf SPTE is frozen by another CPU. - */ - if (iter.level != fault->goal_level || is_removed_spte(iter.old_spte)) { - rcu_read_unlock(); - return RET_PF_RETRY; + if (fault->huge_page_disallowed && + fault->req_level >= iter.level) { + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + track_possible_nx_huge_page(kvm, sp); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + } } ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter); - rcu_read_unlock(); +retry: + rcu_read_unlock(); return ret; } @@ -1472,6 +1471,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, return sp; } +/* Note, the caller is responsible for initializing @sp. */ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared) { @@ -1479,8 +1479,6 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, const int level = iter->level; int ret, i; - tdp_mmu_init_child_sp(sp, iter); - /* * No need for atomics when writing to sp->spt since the page table has * not been linked in yet and thus is not reachable from any other CPU. @@ -1496,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, * correctness standpoint since the translation will be the same either * way. */ - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); if (ret) goto out; @@ -1556,6 +1554,8 @@ retry: continue; } + tdp_mmu_init_child_sp(sp, &iter); + if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared)) goto retry; diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h index c163f7cc23ca54aa93d466447d9aaf7ab176bf41..d3714200b932afacd7a3e5273e6361c28dd86f55 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.h +++ b/arch/x86/kvm/mmu/tdp_mmu.h @@ -5,6 +5,8 @@ #include +#include "spte.h" + hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu); __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index de1fd73697365c18e27157205c0c7a39e4650127..684393c2210523d917675f6709fa7ab1dd5fd8ed 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -101,10 +101,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) struct kvm_pmu *pmu = pmc_to_pmu(pmc); bool skip_pmi = false; - /* Ignore counters that have been reprogrammed already. */ - if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi)) - return; - if (pmc->perf_event && pmc->perf_event->attr.precise_ip) { if (!in_pmi) { /* @@ -122,7 +118,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi) } else { __set_bit(pmc->idx, (unsigned long *)&pmu->global_status); } - kvm_make_request(KVM_REQ_PMU, pmc->vcpu); if (!pmc->intr || skip_pmi) return; @@ -147,12 +142,22 @@ static void kvm_perf_overflow(struct perf_event *perf_event, { struct kvm_pmc *pmc = perf_event->overflow_handler_context; + /* + * Ignore overflow events for counters that are scheduled to be + * reprogrammed, e.g. if a PMI for the previous event races with KVM's + * handling of a related guest WRMSR. + */ + if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi)) + return; + __kvm_perf_overflow(pmc, true); + + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); } -static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, - u64 config, bool exclude_user, - bool exclude_kernel, bool intr) +static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config, + bool exclude_user, bool exclude_kernel, + bool intr) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); struct perf_event *event; @@ -204,14 +209,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, if (IS_ERR(event)) { pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n", PTR_ERR(event), pmc->idx); - return; + return PTR_ERR(event); } pmc->perf_event = event; pmc_to_pmu(pmc)->event_count++; - clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); pmc->is_paused = false; pmc->intr = intr || pebs; + return 0; } static void pmc_pause_counter(struct kvm_pmc *pmc) @@ -245,7 +250,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc) perf_event_enable(pmc->perf_event); pmc->is_paused = false; - clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); return true; } @@ -293,7 +297,7 @@ out: return allow_event; } -void reprogram_counter(struct kvm_pmc *pmc) +static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); u64 eventsel = pmc->eventsel; @@ -303,10 +307,13 @@ void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) - return; + goto reprogram_complete; if (!check_pmu_event_filter(pmc)) - return; + goto reprogram_complete; + + if (pmc->counter < pmc->prev_counter) + __kvm_perf_overflow(pmc, false); if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -324,18 +331,29 @@ void reprogram_counter(struct kvm_pmc *pmc) } if (pmc->current_config == new_config && pmc_resume_counter(pmc)) - return; + goto reprogram_complete; pmc_release_perf_event(pmc); pmc->current_config = new_config; - pmc_reprogram_counter(pmc, PERF_TYPE_RAW, - (eventsel & pmu->raw_event_mask), - !(eventsel & ARCH_PERFMON_EVENTSEL_USR), - !(eventsel & ARCH_PERFMON_EVENTSEL_OS), - eventsel & ARCH_PERFMON_EVENTSEL_INT); + + /* + * If reprogramming fails, e.g. due to contention, leave the counter's + * regprogram bit set, i.e. opportunistically try again on the next PMU + * refresh. Don't make a new request as doing so can stall the guest + * if reprogramming repeatedly fails. + */ + if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW, + (eventsel & pmu->raw_event_mask), + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), + eventsel & ARCH_PERFMON_EVENTSEL_INT)) + return; + +reprogram_complete: + clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi); + pmc->prev_counter = 0; } -EXPORT_SYMBOL_GPL(reprogram_counter); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) { @@ -345,10 +363,11 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu) for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) { struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit); - if (unlikely(!pmc || !pmc->perf_event)) { + if (unlikely(!pmc)) { clear_bit(bit, pmu->reprogram_pmi); continue; } + reprogram_counter(pmc); } @@ -522,14 +541,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { - u64 prev_count; - - prev_count = pmc->counter; + pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - - reprogram_counter(pmc); - if (pmc->counter < prev_count) - __kvm_perf_overflow(pmc, false); + kvm_pmu_request_counter_reprogam(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, @@ -542,12 +556,15 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, static inline bool cpl_is_matched(struct kvm_pmc *pmc) { bool select_os, select_user; - u64 config = pmc->current_config; + u64 config; if (pmc_is_gp(pmc)) { + config = pmc->eventsel; select_os = config & ARCH_PERFMON_EVENTSEL_OS; select_user = config & ARCH_PERFMON_EVENTSEL_USR; } else { + config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl, + pmc->idx - INTEL_PMC_IDX_FIXED); select_os = config & 0x1; select_user = config & 0x2; } @@ -577,6 +594,8 @@ EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event); int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) { struct kvm_pmu_event_filter tmp, *filter; + struct kvm_vcpu *vcpu; + unsigned long i; size_t size; int r; @@ -613,9 +632,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) mutex_lock(&kvm->lock); filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter, mutex_is_locked(&kvm->lock)); + synchronize_srcu_expedited(&kvm->srcu); + + BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) > + sizeof(((struct kvm_pmu *)0)->__reprogram_pmi)); + + kvm_for_each_vcpu(i, vcpu, kvm) + atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull); + + kvm_make_all_cpus_request(kvm, KVM_REQ_PMU); + mutex_unlock(&kvm->lock); - synchronize_srcu_expedited(&kvm->srcu); r = 0; cleanup: kfree(filter); diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 5cc5721f260bfea18fd8fd754f6aded0a9334309..85ff3c0588bac01e0dee3edd607c33d4aca347ba 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -183,7 +183,11 @@ static inline void kvm_init_pmu_capability(void) KVM_PMC_MAX_FIXED); } -void reprogram_counter(struct kvm_pmc *pmc); +static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +{ + set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); +} void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu); void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index 4e5b8444f161c3b01b30c2b58d9aa695f0000b4e..042d0aca3c92b238602d93d0158e043bcd2b4447 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -7,17 +7,30 @@ #include /* - * Hardware-defined CPUID leafs that are scattered in the kernel, but need to - * be directly used by KVM. Note, these word values conflict with the kernel's - * "bug" caps, but KVM doesn't use those. + * Hardware-defined CPUID leafs that are either scattered by the kernel or are + * unknown to the kernel, but need to be directly used by KVM. Note, these + * word values conflict with the kernel's "bug" caps, but KVM doesn't use those. */ enum kvm_only_cpuid_leafs { CPUID_12_EAX = NCAPINTS, + CPUID_7_1_EDX, NR_KVM_CPU_CAPS, NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS, }; +/* + * Define a KVM-only feature flag. + * + * For features that are scattered by cpufeatures.h, __feature_translate() also + * needs to be updated to translate the kernel-defined feature into the + * KVM-defined feature. + * + * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h, + * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so + * that X86_FEATURE_* can be used in KVM. No __feature_translate() handling is + * needed in this case. + */ #define KVM_X86_FEATURE(w, f) ((w)*32 + (f)) /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */ @@ -25,6 +38,11 @@ enum kvm_only_cpuid_leafs { #define KVM_X86_FEATURE_SGX2 KVM_X86_FEATURE(CPUID_12_EAX, 1) #define KVM_X86_FEATURE_SGX_EDECCSSA KVM_X86_FEATURE(CPUID_12_EAX, 11) +/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */ +#define X86_FEATURE_AVX_VNNI_INT8 KVM_X86_FEATURE(CPUID_7_1_EDX, 4) +#define X86_FEATURE_AVX_NE_CONVERT KVM_X86_FEATURE(CPUID_7_1_EDX, 5) +#define X86_FEATURE_PREFETCHITI KVM_X86_FEATURE(CPUID_7_1_EDX, 14) + struct cpuid_reg { u32 function; u32 index; @@ -49,6 +67,7 @@ static const struct cpuid_reg reverse_cpuid[] = { [CPUID_7_1_EAX] = { 7, 1, CPUID_EAX}, [CPUID_12_EAX] = {0x00000012, 0, CPUID_EAX}, [CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX}, + [CPUID_7_1_EDX] = { 7, 1, CPUID_EDX}, }; /* diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c new file mode 100644 index 0000000000000000000000000000000000000000..a9c1c2af8d94c21406cb18916bfea52c0f8fc647 --- /dev/null +++ b/arch/x86/kvm/smm.c @@ -0,0 +1,649 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include "x86.h" +#include "kvm_cache_regs.h" +#include "kvm_emulate.h" +#include "smm.h" +#include "cpuid.h" +#include "trace.h" + +#define CHECK_SMRAM32_OFFSET(field, offset) \ + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00) + +#define CHECK_SMRAM64_OFFSET(field, offset) \ + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00) + +static void check_smram_offsets(void) +{ + /* 32 bit SMRAM image */ + CHECK_SMRAM32_OFFSET(reserved1, 0xFE00); + CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); + CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); + CHECK_SMRAM32_OFFSET(io_inst_restart, 0xFF00); + CHECK_SMRAM32_OFFSET(auto_hlt_restart, 0xFF02); + CHECK_SMRAM32_OFFSET(io_restart_rdi, 0xFF04); + CHECK_SMRAM32_OFFSET(io_restart_rcx, 0xFF08); + CHECK_SMRAM32_OFFSET(io_restart_rsi, 0xFF0C); + CHECK_SMRAM32_OFFSET(io_restart_rip, 0xFF10); + CHECK_SMRAM32_OFFSET(cr4, 0xFF14); + CHECK_SMRAM32_OFFSET(reserved2, 0xFF18); + CHECK_SMRAM32_OFFSET(int_shadow, 0xFF1A); + CHECK_SMRAM32_OFFSET(reserved3, 0xFF1B); + CHECK_SMRAM32_OFFSET(ds, 0xFF2C); + CHECK_SMRAM32_OFFSET(fs, 0xFF38); + CHECK_SMRAM32_OFFSET(gs, 0xFF44); + CHECK_SMRAM32_OFFSET(idtr, 0xFF50); + CHECK_SMRAM32_OFFSET(tr, 0xFF5C); + CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C); + CHECK_SMRAM32_OFFSET(ldtr, 0xFF78); + CHECK_SMRAM32_OFFSET(es, 0xFF84); + CHECK_SMRAM32_OFFSET(cs, 0xFF90); + CHECK_SMRAM32_OFFSET(ss, 0xFF9C); + CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8); + CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC); + CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0); + CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4); + CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8); + CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC); + CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0); + CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4); + CHECK_SMRAM32_OFFSET(dr7, 0xFFC8); + CHECK_SMRAM32_OFFSET(dr6, 0xFFCC); + CHECK_SMRAM32_OFFSET(gprs, 0xFFD0); + CHECK_SMRAM32_OFFSET(eip, 0xFFF0); + CHECK_SMRAM32_OFFSET(eflags, 0xFFF4); + CHECK_SMRAM32_OFFSET(cr3, 0xFFF8); + CHECK_SMRAM32_OFFSET(cr0, 0xFFFC); + + /* 64 bit SMRAM image */ + CHECK_SMRAM64_OFFSET(es, 0xFE00); + CHECK_SMRAM64_OFFSET(cs, 0xFE10); + CHECK_SMRAM64_OFFSET(ss, 0xFE20); + CHECK_SMRAM64_OFFSET(ds, 0xFE30); + CHECK_SMRAM64_OFFSET(fs, 0xFE40); + CHECK_SMRAM64_OFFSET(gs, 0xFE50); + CHECK_SMRAM64_OFFSET(gdtr, 0xFE60); + CHECK_SMRAM64_OFFSET(ldtr, 0xFE70); + CHECK_SMRAM64_OFFSET(idtr, 0xFE80); + CHECK_SMRAM64_OFFSET(tr, 0xFE90); + CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0); + CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8); + CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); + CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); + CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); + CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); + CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); + CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); + CHECK_SMRAM64_OFFSET(amd_nmi_mask, 0xFECA); + CHECK_SMRAM64_OFFSET(int_shadow, 0xFECB); + CHECK_SMRAM64_OFFSET(reserved2, 0xFECC); + CHECK_SMRAM64_OFFSET(efer, 0xFED0); + CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); + CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); + CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8); + CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0); + CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC); + CHECK_SMRAM64_OFFSET(smbase, 0xFF00); + CHECK_SMRAM64_OFFSET(reserved4, 0xFF04); + CHECK_SMRAM64_OFFSET(ssp, 0xFF18); + CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20); + CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28); + CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30); + CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38); + CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40); + CHECK_SMRAM64_OFFSET(cr4, 0xFF48); + CHECK_SMRAM64_OFFSET(cr3, 0xFF50); + CHECK_SMRAM64_OFFSET(cr0, 0xFF58); + CHECK_SMRAM64_OFFSET(dr7, 0xFF60); + CHECK_SMRAM64_OFFSET(dr6, 0xFF68); + CHECK_SMRAM64_OFFSET(rflags, 0xFF70); + CHECK_SMRAM64_OFFSET(rip, 0xFF78); + CHECK_SMRAM64_OFFSET(gprs, 0xFF80); + + BUILD_BUG_ON(sizeof(union kvm_smram) != 512); +} + +#undef CHECK_SMRAM64_OFFSET +#undef CHECK_SMRAM32_OFFSET + + +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) +{ + BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); + + trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); + + if (entering_smm) { + vcpu->arch.hflags |= HF_SMM_MASK; + } else { + vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); + + /* Process a latched INIT or SMI, if any. */ + kvm_make_request(KVM_REQ_EVENT, vcpu); + + /* + * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, + * on SMM exit we still need to reload them from + * guest memory + */ + vcpu->arch.pdptrs_from_userspace = false; + } + + kvm_mmu_reset_context(vcpu); +} + +void process_smi(struct kvm_vcpu *vcpu) +{ + vcpu->arch.smi_pending = true; + kvm_make_request(KVM_REQ_EVENT, vcpu); +} + +static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) +{ + u32 flags = 0; + flags |= seg->g << 23; + flags |= seg->db << 22; + flags |= seg->l << 21; + flags |= seg->avl << 20; + flags |= seg->present << 15; + flags |= seg->dpl << 13; + flags |= seg->s << 12; + flags |= seg->type << 8; + return flags; +} + +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, + struct kvm_smm_seg_state_32 *state, + u32 *selector, int n) +{ + struct kvm_segment seg; + + kvm_get_segment(vcpu, &seg, n); + *selector = seg.selector; + state->base = seg.base; + state->limit = seg.limit; + state->flags = enter_smm_get_segment_flags(&seg); +} + +#ifdef CONFIG_X86_64 +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, + struct kvm_smm_seg_state_64 *state, + int n) +{ + struct kvm_segment seg; + + kvm_get_segment(vcpu, &seg, n); + state->selector = seg.selector; + state->attributes = enter_smm_get_segment_flags(&seg) >> 8; + state->limit = seg.limit; + state->base = seg.base; +} +#endif + +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, + struct kvm_smram_state_32 *smram) +{ + struct desc_ptr dt; + unsigned long val; + int i; + + smram->cr0 = kvm_read_cr0(vcpu); + smram->cr3 = kvm_read_cr3(vcpu); + smram->eflags = kvm_get_rflags(vcpu); + smram->eip = kvm_rip_read(vcpu); + + for (i = 0; i < 8; i++) + smram->gprs[i] = kvm_register_read_raw(vcpu, i); + + kvm_get_dr(vcpu, 6, &val); + smram->dr6 = (u32)val; + kvm_get_dr(vcpu, 7, &val); + smram->dr7 = (u32)val; + + enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); + enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); + smram->gdtr.base = dt.address; + smram->gdtr.limit = dt.size; + + static_call(kvm_x86_get_idt)(vcpu, &dt); + smram->idtr.base = dt.address; + smram->idtr.limit = dt.size; + + enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); + enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); + enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); + + enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); + enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); + enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); + + smram->cr4 = kvm_read_cr4(vcpu); + smram->smm_revision = 0x00020000; + smram->smbase = vcpu->arch.smbase; + + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); +} + +#ifdef CONFIG_X86_64 +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, + struct kvm_smram_state_64 *smram) +{ + struct desc_ptr dt; + unsigned long val; + int i; + + for (i = 0; i < 16; i++) + smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); + + smram->rip = kvm_rip_read(vcpu); + smram->rflags = kvm_get_rflags(vcpu); + + + kvm_get_dr(vcpu, 6, &val); + smram->dr6 = val; + kvm_get_dr(vcpu, 7, &val); + smram->dr7 = val; + + smram->cr0 = kvm_read_cr0(vcpu); + smram->cr3 = kvm_read_cr3(vcpu); + smram->cr4 = kvm_read_cr4(vcpu); + + smram->smbase = vcpu->arch.smbase; + smram->smm_revison = 0x00020064; + + smram->efer = vcpu->arch.efer; + + enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); + + static_call(kvm_x86_get_idt)(vcpu, &dt); + smram->idtr.limit = dt.size; + smram->idtr.base = dt.address; + + enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); + + static_call(kvm_x86_get_gdt)(vcpu, &dt); + smram->gdtr.limit = dt.size; + smram->gdtr.base = dt.address; + + enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); + enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); + enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); + enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); + enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); + enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); + + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); +} +#endif + +void enter_smm(struct kvm_vcpu *vcpu) +{ + struct kvm_segment cs, ds; + struct desc_ptr dt; + unsigned long cr0; + union kvm_smram smram; + + check_smram_offsets(); + + memset(smram.bytes, 0, sizeof(smram.bytes)); + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + enter_smm_save_state_64(vcpu, &smram.smram64); + else +#endif + enter_smm_save_state_32(vcpu, &smram.smram32); + + /* + * Give enter_smm() a chance to make ISA-specific changes to the vCPU + * state (e.g. leave guest mode) after we've saved the state into the + * SMM state-save area. + * + * Kill the VM in the unlikely case of failure, because the VM + * can be in undefined state in this case. + */ + if (static_call(kvm_x86_enter_smm)(vcpu, &smram)) + goto error; + + kvm_smm_changed(vcpu, true); + + if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram))) + goto error; + + if (static_call(kvm_x86_get_nmi_mask)(vcpu)) + vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; + else + static_call(kvm_x86_set_nmi_mask)(vcpu, true); + + kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); + kvm_rip_write(vcpu, 0x8000); + + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + + cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); + static_call(kvm_x86_set_cr0)(vcpu, cr0); + vcpu->arch.cr0 = cr0; + + static_call(kvm_x86_set_cr4)(vcpu, 0); + + /* Undocumented: IDT limit is set to zero on entry to SMM. */ + dt.address = dt.size = 0; + static_call(kvm_x86_set_idt)(vcpu, &dt); + + if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1))) + goto error; + + cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; + cs.base = vcpu->arch.smbase; + + ds.selector = 0; + ds.base = 0; + + cs.limit = ds.limit = 0xffffffff; + cs.type = ds.type = 0x3; + cs.dpl = ds.dpl = 0; + cs.db = ds.db = 0; + cs.s = ds.s = 1; + cs.l = ds.l = 0; + cs.g = ds.g = 1; + cs.avl = ds.avl = 0; + cs.present = ds.present = 1; + cs.unusable = ds.unusable = 0; + cs.padding = ds.padding = 0; + + kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); + kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); + kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + if (static_call(kvm_x86_set_efer)(vcpu, 0)) + goto error; +#endif + + kvm_update_cpuid_runtime(vcpu); + kvm_mmu_reset_context(vcpu); + return; +error: + kvm_vm_dead(vcpu->kvm); +} + +static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) +{ + desc->g = (flags >> 23) & 1; + desc->db = (flags >> 22) & 1; + desc->l = (flags >> 21) & 1; + desc->avl = (flags >> 20) & 1; + desc->present = (flags >> 15) & 1; + desc->dpl = (flags >> 13) & 3; + desc->s = (flags >> 12) & 1; + desc->type = (flags >> 8) & 15; + + desc->unusable = !desc->present; + desc->padding = 0; +} + +static int rsm_load_seg_32(struct kvm_vcpu *vcpu, + const struct kvm_smm_seg_state_32 *state, + u16 selector, int n) +{ + struct kvm_segment desc; + + desc.selector = selector; + desc.base = state->base; + desc.limit = state->limit; + rsm_set_desc_flags(&desc, state->flags); + kvm_set_segment(vcpu, &desc, n); + return X86EMUL_CONTINUE; +} + +#ifdef CONFIG_X86_64 + +static int rsm_load_seg_64(struct kvm_vcpu *vcpu, + const struct kvm_smm_seg_state_64 *state, + int n) +{ + struct kvm_segment desc; + + desc.selector = state->selector; + rsm_set_desc_flags(&desc, state->attributes << 8); + desc.limit = state->limit; + desc.base = state->base; + kvm_set_segment(vcpu, &desc, n); + return X86EMUL_CONTINUE; +} +#endif + +static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, + u64 cr0, u64 cr3, u64 cr4) +{ + int bad; + u64 pcid; + + /* In order to later set CR4.PCIDE, CR3[11:0] must be zero. */ + pcid = 0; + if (cr4 & X86_CR4_PCIDE) { + pcid = cr3 & 0xfff; + cr3 &= ~0xfff; + } + + bad = kvm_set_cr3(vcpu, cr3); + if (bad) + return X86EMUL_UNHANDLEABLE; + + /* + * First enable PAE, long mode needs it before CR0.PG = 1 is set. + * Then enable protected mode. However, PCID cannot be enabled + * if EFER.LMA=0, so set it separately. + */ + bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); + if (bad) + return X86EMUL_UNHANDLEABLE; + + bad = kvm_set_cr0(vcpu, cr0); + if (bad) + return X86EMUL_UNHANDLEABLE; + + if (cr4 & X86_CR4_PCIDE) { + bad = kvm_set_cr4(vcpu, cr4); + if (bad) + return X86EMUL_UNHANDLEABLE; + if (pcid) { + bad = kvm_set_cr3(vcpu, cr3 | pcid); + if (bad) + return X86EMUL_UNHANDLEABLE; + } + + } + + return X86EMUL_CONTINUE; +} + +static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, + const struct kvm_smram_state_32 *smstate) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct desc_ptr dt; + int i, r; + + ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; + ctxt->_eip = smstate->eip; + + for (i = 0; i < 8; i++) + *reg_write(ctxt, i) = smstate->gprs[i]; + + if (kvm_set_dr(vcpu, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; + if (kvm_set_dr(vcpu, 7, smstate->dr7)) + return X86EMUL_UNHANDLEABLE; + + rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); + rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); + + dt.address = smstate->gdtr.base; + dt.size = smstate->gdtr.limit; + static_call(kvm_x86_set_gdt)(vcpu, &dt); + + dt.address = smstate->idtr.base; + dt.size = smstate->idtr.limit; + static_call(kvm_x86_set_idt)(vcpu, &dt); + + rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES); + rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); + rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); + + rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); + rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); + rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); + + vcpu->arch.smbase = smstate->smbase; + + r = rsm_enter_protected_mode(vcpu, smstate->cr0, + smstate->cr3, smstate->cr4); + + if (r != X86EMUL_CONTINUE) + return r; + + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + ctxt->interruptibility = (u8)smstate->int_shadow; + + return r; +} + +#ifdef CONFIG_X86_64 +static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, + const struct kvm_smram_state_64 *smstate) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + struct desc_ptr dt; + int i, r; + + for (i = 0; i < 16; i++) + *reg_write(ctxt, i) = smstate->gprs[15 - i]; + + ctxt->_eip = smstate->rip; + ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; + + if (kvm_set_dr(vcpu, 6, smstate->dr6)) + return X86EMUL_UNHANDLEABLE; + if (kvm_set_dr(vcpu, 7, smstate->dr7)) + return X86EMUL_UNHANDLEABLE; + + vcpu->arch.smbase = smstate->smbase; + + if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) + return X86EMUL_UNHANDLEABLE; + + rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); + + dt.size = smstate->idtr.limit; + dt.address = smstate->idtr.base; + static_call(kvm_x86_set_idt)(vcpu, &dt); + + rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR); + + dt.size = smstate->gdtr.limit; + dt.address = smstate->gdtr.base; + static_call(kvm_x86_set_gdt)(vcpu, &dt); + + r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); + if (r != X86EMUL_CONTINUE) + return r; + + rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES); + rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS); + rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS); + rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS); + rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); + rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); + + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); + ctxt->interruptibility = (u8)smstate->int_shadow; + + return X86EMUL_CONTINUE; +} +#endif + +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) +{ + struct kvm_vcpu *vcpu = ctxt->vcpu; + unsigned long cr0; + union kvm_smram smram; + u64 smbase; + int ret; + + smbase = vcpu->arch.smbase; + + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram)); + if (ret < 0) + return X86EMUL_UNHANDLEABLE; + + if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0) + static_call(kvm_x86_set_nmi_mask)(vcpu, false); + + kvm_smm_changed(vcpu, false); + + /* + * Get back to real mode, to prepare a safe state in which to load + * CR0/CR3/CR4/EFER. It's all a bit more complicated if the vCPU + * supports long mode. + */ +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + struct kvm_segment cs_desc; + unsigned long cr4; + + /* Zero CR4.PCIDE before CR0.PG. */ + cr4 = kvm_read_cr4(vcpu); + if (cr4 & X86_CR4_PCIDE) + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE); + + /* A 32-bit code segment is required to clear EFER.LMA. */ + memset(&cs_desc, 0, sizeof(cs_desc)); + cs_desc.type = 0xb; + cs_desc.s = cs_desc.g = cs_desc.present = 1; + kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS); + } +#endif + + /* For the 64-bit case, this will clear EFER.LMA. */ + cr0 = kvm_read_cr0(vcpu); + if (cr0 & X86_CR0_PE) + kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE)); + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) { + unsigned long cr4, efer; + + /* Clear CR4.PAE before clearing EFER.LME. */ + cr4 = kvm_read_cr4(vcpu); + if (cr4 & X86_CR4_PAE) + kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE); + + /* And finally go back to 32-bit mode. */ + efer = 0; + kvm_set_msr(vcpu, MSR_EFER, efer); + } +#endif + + /* + * Give leave_smm() a chance to make ISA-specific changes to the vCPU + * state (e.g. enter guest mode) before loading state from the SMM + * state-save area. + */ + if (static_call(kvm_x86_leave_smm)(vcpu, &smram)) + return X86EMUL_UNHANDLEABLE; + +#ifdef CONFIG_X86_64 + if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return rsm_load_state_64(ctxt, &smram.smram64); + else +#endif + return rsm_load_state_32(ctxt, &smram.smram32); +} diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h new file mode 100644 index 0000000000000000000000000000000000000000..a1cf2ac5bd78d57214e0ac0416be5b1257fb1432 --- /dev/null +++ b/arch/x86/kvm/smm.h @@ -0,0 +1,168 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_KVM_SMM_H +#define ASM_KVM_SMM_H + +#include + +#ifdef CONFIG_KVM_SMM + + +/* + * 32 bit KVM's emulated SMM layout. Based on Intel P6 layout + * (https://www.sandpile.org/x86/smm.htm). + */ + +struct kvm_smm_seg_state_32 { + u32 flags; + u32 limit; + u32 base; +} __packed; + +struct kvm_smram_state_32 { + u32 reserved1[62]; + u32 smbase; + u32 smm_revision; + u16 io_inst_restart; + u16 auto_hlt_restart; + u32 io_restart_rdi; + u32 io_restart_rcx; + u32 io_restart_rsi; + u32 io_restart_rip; + u32 cr4; + + /* A20M#, CPL, shutdown and other reserved/undocumented fields */ + u16 reserved2; + u8 int_shadow; /* KVM extension */ + u8 reserved3[17]; + + struct kvm_smm_seg_state_32 ds; + struct kvm_smm_seg_state_32 fs; + struct kvm_smm_seg_state_32 gs; + struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */ + struct kvm_smm_seg_state_32 tr; + u32 reserved; + struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */ + struct kvm_smm_seg_state_32 ldtr; + struct kvm_smm_seg_state_32 es; + struct kvm_smm_seg_state_32 cs; + struct kvm_smm_seg_state_32 ss; + + u32 es_sel; + u32 cs_sel; + u32 ss_sel; + u32 ds_sel; + u32 fs_sel; + u32 gs_sel; + u32 ldtr_sel; + u32 tr_sel; + + u32 dr7; + u32 dr6; + u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */ + u32 eip; + u32 eflags; + u32 cr3; + u32 cr0; +} __packed; + + +/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */ + +struct kvm_smm_seg_state_64 { + u16 selector; + u16 attributes; + u32 limit; + u64 base; +}; + +struct kvm_smram_state_64 { + + struct kvm_smm_seg_state_64 es; + struct kvm_smm_seg_state_64 cs; + struct kvm_smm_seg_state_64 ss; + struct kvm_smm_seg_state_64 ds; + struct kvm_smm_seg_state_64 fs; + struct kvm_smm_seg_state_64 gs; + struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/ + struct kvm_smm_seg_state_64 ldtr; + struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/ + struct kvm_smm_seg_state_64 tr; + + /* I/O restart and auto halt restart are not implemented by KVM */ + u64 io_restart_rip; + u64 io_restart_rcx; + u64 io_restart_rsi; + u64 io_restart_rdi; + u32 io_restart_dword; + u32 reserved1; + u8 io_inst_restart; + u8 auto_hlt_restart; + u8 amd_nmi_mask; /* Documented in AMD BKDG as NMI mask, not used by KVM */ + u8 int_shadow; + u32 reserved2; + + u64 efer; + + /* + * Two fields below are implemented on AMD only, to store + * SVM guest vmcb address if the #SMI was received while in the guest mode. + */ + u64 svm_guest_flag; + u64 svm_guest_vmcb_gpa; + u64 svm_guest_virtual_int; /* unknown purpose, not implemented */ + + u32 reserved3[3]; + u32 smm_revison; + u32 smbase; + u32 reserved4[5]; + + /* ssp and svm_* fields below are not implemented by KVM */ + u64 ssp; + u64 svm_guest_pat; + u64 svm_host_efer; + u64 svm_host_cr4; + u64 svm_host_cr3; + u64 svm_host_cr0; + + u64 cr4; + u64 cr3; + u64 cr0; + u64 dr7; + u64 dr6; + u64 rflags; + u64 rip; + u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */ +}; + +union kvm_smram { + struct kvm_smram_state_64 smram64; + struct kvm_smram_state_32 smram32; + u8 bytes[512]; +}; + +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) +{ + kvm_make_request(KVM_REQ_SMI, vcpu); + return 0; +} + +static inline bool is_smm(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.hflags & HF_SMM_MASK; +} + +void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); +void enter_smm(struct kvm_vcpu *vcpu); +int emulator_leave_smm(struct x86_emulate_ctxt *ctxt); +void process_smi(struct kvm_vcpu *vcpu); +#else +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } +static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } + +/* + * emulator_leave_smm is used as a function pointer, so the + * stub is defined in x86.c. + */ +#endif + +#endif diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c new file mode 100644 index 0000000000000000000000000000000000000000..088f6429b24cef4de2dd1b7b97e3c657fc2d2f40 --- /dev/null +++ b/arch/x86/kvm/svm/hyperv.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * AMD SVM specific code for Hyper-V on KVM. + * + * Copyright 2022 Red Hat, Inc. and/or its affiliates. + */ +#include "hyperv.h" + +void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL; + svm->vmcb->control.exit_code_hi = 0; + svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH; + svm->vmcb->control.exit_info_2 = 0; + nested_svm_vmexit(svm); +} diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h index 7d6d97968fb981902ae401ffeff87b9406f34382..02f4784b5d446b2f0a86604b589c85981b13e5d1 100644 --- a/arch/x86/kvm/svm/hyperv.h +++ b/arch/x86/kvm/svm/hyperv.h @@ -9,27 +9,37 @@ #include #include "../hyperv.h" +#include "svm.h" -/* - * Hyper-V uses the software reserved 32 bytes in VMCB - * control area to expose SVM enlightenments to guests. - */ -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; +static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW + if (!hv_vcpu) + return; + + hv_vcpu->nested.pa_page_gpa = hve->partition_assist_page; + hv_vcpu->nested.vm_id = hve->hv_vm_id; + hv_vcpu->nested.vp_id = hve->hv_vp_id; +} + +static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + + if (!hv_vcpu) + return false; + + if (!hve->hv_enlightenments_control.nested_flush_hypercall) + return false; + + return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; +} + +void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 995bc0f907591745677aa6950d0a78620ddc1228..bc9cd7086fa972a8c9dd0600f7316278da4029ec 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -25,6 +25,7 @@ #include "trace.h" #include "mmu.h" #include "x86.h" +#include "smm.h" #include "cpuid.h" #include "lapic.h" #include "svm.h" @@ -149,8 +150,12 @@ void recalc_intercepts(struct vcpu_svm *svm) vmcb_clr_intercept(c, INTERCEPT_VINTR); } - /* We don't want to see VMMCALLs from a nested guest */ - vmcb_clr_intercept(c, INTERCEPT_VMMCALL); + /* + * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB + * flush feature is enabled. + */ + if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu)) + vmcb_clr_intercept(c, INTERCEPT_VMMCALL); for (i = 0; i < MAX_INTERCEPT; i++) c->intercepts[i] |= g->intercepts[i]; @@ -179,8 +184,7 @@ void recalc_intercepts(struct vcpu_svm *svm) */ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)svm->nested.ctl.reserved_sw; + struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments; int i; /* @@ -194,7 +198,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) if (!svm->nested.force_msr_bitmap_recalc && kvm_hv_hypercall_enabled(&svm->vcpu) && hve->hv_enlightenments_control.msr_bitmap && - (svm->nested.ctl.clean & BIT(VMCB_HV_NESTED_ENLIGHTENMENTS))) + (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS))) goto set_msrpm_base_pa; if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) @@ -369,8 +373,8 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, /* Hyper-V extensions (Enlightened VMCB) */ if (kvm_hv_hypercall_enabled(vcpu)) { to->clean = from->clean; - memcpy(to->reserved_sw, from->reserved_sw, - sizeof(struct hv_enlightenments)); + memcpy(&to->hv_enlightenments, &from->hv_enlightenments, + sizeof(to->hv_enlightenments)); } } @@ -473,6 +477,15 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu) { + /* + * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or + * L2's VP_ID upon request from the guest. Make sure we check for + * pending entries in the right FIFO upon L1/L2 transition as these + * requests are put by other vCPUs asynchronously. + */ + if (to_hv_vcpu(vcpu) && npt_enabled) + kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + /* * TODO: optimize unconditional TLB flush/MMU sync. A partial list of * things to fix before this can be conditional: @@ -800,6 +813,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, if (kvm_vcpu_apicv_active(vcpu)) kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu); + nested_svm_hv_update_vm_vp_ids(vcpu); + return 0; } @@ -822,6 +837,13 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu) return 1; } + /* This fails when VP assist page is enabled but the supplied GPA is bogus */ + ret = kvm_hv_verify_vp_assist(vcpu); + if (ret) { + kvm_inject_gp(vcpu, 0); + return ret; + } + vmcb12_gpa = svm->vmcb->save.rax; ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); if (ret == -EINVAL) { @@ -1383,6 +1405,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) return 0; } +#ifdef CONFIG_KVM_SMM if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { if (block_nested_events) return -EBUSY; @@ -1391,6 +1414,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) nested_svm_simple_vmexit(svm, SVM_EXIT_SMI); return 0; } +#endif if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { if (block_nested_events) @@ -1417,6 +1441,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) int nested_svm_exit_special(struct vcpu_svm *svm) { u32 exit_code = svm->vmcb->control.exit_code; + struct kvm_vcpu *vcpu = &svm->vcpu; switch (exit_code) { case SVM_EXIT_INTR: @@ -1435,6 +1460,13 @@ int nested_svm_exit_special(struct vcpu_svm *svm) return NESTED_EXIT_HOST; break; } + case SVM_EXIT_VMMCALL: + /* Hyper-V L2 TLB flush hypercall is handled by L0 */ + if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) && + nested_svm_l2_tlb_flush_enabled(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu)) + return NESTED_EXIT_HOST; + break; default: break; } @@ -1485,7 +1517,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, dst->virt_ext = from->virt_ext; dst->pause_filter_count = from->pause_filter_count; dst->pause_filter_thresh = from->pause_filter_thresh; - /* 'clean' and 'reserved_sw' are not changed by KVM */ + /* 'clean' and 'hv_enlightenments' are not changed by KVM */ } static int svm_get_nested_state(struct kvm_vcpu *vcpu, @@ -1715,6 +1747,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) return false; } + if (kvm_hv_verify_vp_assist(vcpu)) + return false; + return true; } @@ -1726,4 +1761,5 @@ struct kvm_x86_nested_ops svm_nested_ops = { .get_nested_state_pages = svm_get_nested_state_pages, .get_state = svm_get_nested_state, .set_state = svm_set_nested_state, + .hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush, }; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 9d65cd095691be1aad51b745eb4c7d5dba992dfe..0e313fbae0556bab9a16f6246fbcab2e6e639085 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -159,7 +159,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } return 0; } @@ -212,7 +212,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu) struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->prev_counter = pmc->eventsel = 0; } } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index efaaef2b7ae11b280da39abf027107ed242fd1b2..86d6897f480684c272a3026e6e9efd7f83cc8464 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -465,9 +465,9 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) return; for (i = 0; i < npages; i++) { - page_virtual = kmap_atomic(pages[i]); + page_virtual = kmap_local_page(pages[i]); clflush_cache_range(page_virtual, PAGE_SIZE); - kunmap_atomic(page_virtual); + kunmap_local(page_virtual); cond_resched(); } } @@ -2648,7 +2648,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) ghcb_scratch_beg = control->ghcb_gpa + offsetof(struct ghcb, shared_buffer); ghcb_scratch_end = control->ghcb_gpa + - offsetof(struct ghcb, reserved_1); + offsetof(struct ghcb, reserved_0xff0); /* * If the scratch area begins within the GHCB, it must be diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index ce362e88a5676cf1deac281fed528a010e594291..9a194aa1a75a498d69f64bfafdca887e6c2e16bf 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -6,6 +6,7 @@ #include "mmu.h" #include "kvm_cache_regs.h" #include "x86.h" +#include "smm.h" #include "cpuid.h" #include "pmu.h" @@ -2708,8 +2709,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr) if (cpu_feature_enabled(X86_FEATURE_LFENCE_RDTSC)) msr->data |= MSR_AMD64_DE_CFG_LFENCE_SERIALIZE; break; - case MSR_IA32_PERF_CAPABILITIES: - return 0; default: return KVM_MSR_RET_INVALID; } @@ -3723,6 +3722,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries. + * A TLB flush for the current ASID flushes both "host" and "guest" TLB + * entries, and thus is a superset of Hyper-V's fine grained flushing. + */ + kvm_hv_vcpu_purge_flush_tlb(vcpu); + /* * Flush only the current ASID even if the TLB flush was invoked via * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all @@ -3889,8 +3895,14 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu) static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) { - if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) + struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control; + + /* + * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM + * can't read guest memory (dereference memslots) to decode the WRMSR. + */ + if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 && + nrips && control->next_rip) return handle_fastpath_set_msr_irqoff(vcpu); return EXIT_FASTPATH_NONE; @@ -4102,6 +4114,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: return false; case MSR_IA32_SMBASE: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + return false; /* SEV-ES guests do not support SMM, so report false */ if (kvm && sev_es_guest(kvm)) return false; @@ -4358,6 +4372,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } +#ifdef CONFIG_KVM_SMM bool svm_smi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4385,7 +4400,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return 1; } -static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map_save; @@ -4394,10 +4409,16 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) if (!is_guest_mode(vcpu)) return 0; - /* FED8h - SVM Guest */ - put_smstate(u64, smstate, 0x7ed8, 1); - /* FEE0h - SVM Guest VMCB Physical Address */ - put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); + /* + * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is + * responsible for ensuring nested SVM and SMIs are mutually exclusive. + */ + + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return 1; + + smram->smram64.svm_guest_flag = 1; + smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; @@ -4419,8 +4440,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) * that, see svm_prepare_switch_to_guest()) which must be * preserved. */ - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), - &map_save) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) return 1; BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); @@ -4432,34 +4452,33 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map, map_save; - u64 saved_efer, vmcb12_gpa; struct vmcb *vmcb12; int ret; + const struct kvm_smram_state_64 *smram64 = &smram->smram64; + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) return 0; /* Non-zero if SMI arrived while vCPU was in guest mode. */ - if (!GET_SMSTATE(u64, smstate, 0x7ed8)) + if (!smram64->svm_guest_flag) return 0; if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) return 1; - saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); - if (!(saved_efer & EFER_SVME)) + if (!(smram64->efer & EFER_SVME)) return 1; - vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map)) return 1; ret = 1; - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL) + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) goto unmap_map; if (svm_allocate_nested(svm)) @@ -4481,7 +4500,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) vmcb12 = map.hva; nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); + ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); if (ret) goto unmap_save; @@ -4507,6 +4526,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) /* We must be in SMM; RSM will cause a vmexit anyway. */ } } +#endif static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, void *insn, int insn_len) @@ -4782,10 +4802,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .pi_update_irte = avic_pi_update_irte, .setup_mce = svm_setup_mce, +#ifdef CONFIG_KVM_SMM .smi_allowed = svm_smi_allowed, .enter_smm = svm_enter_smm, .leave_smm = svm_leave_smm, .enable_smi_window = svm_enable_smi_window, +#endif .mem_enc_ioctl = sev_mem_enc_ioctl, .mem_enc_register_region = sev_mem_enc_register_region, @@ -4851,6 +4873,7 @@ static __init void svm_set_cpu_caps(void) { kvm_set_cpu_caps(); + kvm_caps.supported_perf_cap = 0; kvm_caps.supported_xss = 0; /* CPUID 0x80000001 and 0x8000000A (SVM features) */ diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 199a2ecef1cec611d8c903ae9a525589ac30ea25..4826e6cc611bf1305c55c93c52f2364bf40d4506 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -151,7 +151,10 @@ struct vmcb_ctrl_area_cached { u64 nested_cr3; u64 virt_ext; u32 clean; - u8 reserved_sw[32]; + union { + struct hv_vmcb_enlightenments hv_enlightenments; + u8 reserved_sw[32]; + }; }; struct svm_nested_state { diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c index 8cdc62c74a964e4369d0d1a4e09c60cde740095b..26a89d0da93e46ba33f2b36164c2e1f751243fa3 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.c +++ b/arch/x86/kvm/svm/svm_onhyperv.c @@ -14,9 +14,9 @@ #include "kvm_onhyperv.h" #include "svm_onhyperv.h" -int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { - struct hv_enlightenments *hve; + struct hv_vmcb_enlightenments *hve; struct hv_partition_assist_pg **p_hv_pa_pg = &to_kvm_hv(vcpu->kvm)->hv_pa_pg; @@ -26,13 +26,13 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) if (!*p_hv_pa_pg) return -ENOMEM; - hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw; + hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments; hve->partition_assist_page = __pa(*p_hv_pa_pg); hve->hv_vm_id = (unsigned long)vcpu->kvm; if (!hve->hv_enlightenments_control.nested_flush_hypercall) { hve->hv_enlightenments_control.nested_flush_hypercall = 1; - vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(to_svm(vcpu)->vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } return 0; diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index e2fc593804657303fb9251d87fd07c9605848922..45faf84476cecf5d468ea4937c3e2d147874605b 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -13,12 +13,14 @@ static struct kvm_x86_ops svm_x86_ops; -int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu); +int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu); static inline void svm_hv_init_vmcb(struct vmcb *vmcb) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; + + BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) != + sizeof(vmcb->control.reserved_sw)); if (npt_enabled && ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB) @@ -51,8 +53,8 @@ static inline void svm_hv_hardware_setup(void) vp_ap->nested_control.features.directhypercall = 1; } - svm_x86_ops.enable_direct_tlbflush = - svm_hv_enable_direct_tlbflush; + svm_x86_ops.enable_l2_tlb_flush = + svm_hv_enable_l2_tlb_flush; } } @@ -60,23 +62,20 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments( struct kvm_vcpu *vcpu) { struct vmcb *vmcb = to_svm(vcpu)->vmcb; - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; if (hve->hv_enlightenments_control.msr_bitmap) - vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } -static inline void svm_hv_update_vp_id(struct vmcb *vmcb, - struct kvm_vcpu *vcpu) +static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu) { - struct hv_enlightenments *hve = - (struct hv_enlightenments *)vmcb->control.reserved_sw; + struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments; u32 vp_index = kvm_hv_get_vpindex(vcpu); if (hve->hv_vp_id != vp_index) { hve->hv_vp_id = vp_index; - vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS); + vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS); } } #else diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 34367dc203f217dd9f87e45831ef9ce0ecc871b3..8e8295e774f0f3aa2b08a58948535f4caff2d4ac 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include +#include #include #include #include diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index bc25589ad588676974770919234b67f9ce198c43..83843379813ee3ef8cca33d1986ef61ea6e1ff9b 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -113,12 +113,13 @@ TRACE_EVENT(kvm_hv_hypercall_done, * Tracepoint for Xen hypercall. */ TRACE_EVENT(kvm_xen_hypercall, - TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1, - unsigned long a2, unsigned long a3, unsigned long a4, - unsigned long a5), - TP_ARGS(nr, a0, a1, a2, a3, a4, a5), + TP_PROTO(u8 cpl, unsigned long nr, + unsigned long a0, unsigned long a1, unsigned long a2, + unsigned long a3, unsigned long a4, unsigned long a5), + TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5), TP_STRUCT__entry( + __field(u8, cpl) __field(unsigned long, nr) __field(unsigned long, a0) __field(unsigned long, a1) @@ -129,6 +130,7 @@ TRACE_EVENT(kvm_xen_hypercall, ), TP_fast_assign( + __entry->cpl = cpl; __entry->nr = nr; __entry->a0 = a0; __entry->a1 = a1; @@ -138,8 +140,9 @@ TRACE_EVENT(kvm_xen_hypercall, __entry->a4 = a5; ), - TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", - __entry->nr, __entry->a0, __entry->a1, __entry->a2, + TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx", + __entry->cpl, __entry->nr, + __entry->a0, __entry->a1, __entry->a2, __entry->a3, __entry->a4, __entry->a5) ); @@ -1547,38 +1550,41 @@ TRACE_EVENT(kvm_hv_timer_state, * Tracepoint for kvm_hv_flush_tlb. */ TRACE_EVENT(kvm_hv_flush_tlb, - TP_PROTO(u64 processor_mask, u64 address_space, u64 flags), - TP_ARGS(processor_mask, address_space, flags), + TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode), + TP_ARGS(processor_mask, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, processor_mask) __field(u64, address_space) __field(u64, flags) + __field(bool, guest_mode) ), TP_fast_assign( __entry->processor_mask = processor_mask; __entry->address_space = address_space; __entry->flags = flags; + __entry->guest_mode = guest_mode; ), - TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx", + TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s", __entry->processor_mask, __entry->address_space, - __entry->flags) + __entry->flags, __entry->guest_mode ? "(L2)" : "") ); /* * Tracepoint for kvm_hv_flush_tlb_ex. */ TRACE_EVENT(kvm_hv_flush_tlb_ex, - TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags), - TP_ARGS(valid_bank_mask, format, address_space, flags), + TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode), + TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode), TP_STRUCT__entry( __field(u64, valid_bank_mask) __field(u64, format) __field(u64, address_space) __field(u64, flags) + __field(bool, guest_mode) ), TP_fast_assign( @@ -1586,12 +1592,14 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex, __entry->format = format; __entry->address_space = address_space; __entry->flags = flags; + __entry->guest_mode = guest_mode; ), TP_printk("valid_bank_mask 0x%llx format 0x%llx " - "address_space 0x%llx flags 0x%llx", + "address_space 0x%llx flags 0x%llx %s", __entry->valid_bank_mask, __entry->format, - __entry->address_space, __entry->flags) + __entry->address_space, __entry->flags, + __entry->guest_mode ? "(L2)" : "") ); /* diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 07254314f3dd5b4412b97c0d0b42aa4de52ac835..cd2ac9536c99810ba25658d7ac273ff2a63c53f3 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -395,30 +395,6 @@ static inline bool vmx_pebs_supported(void) return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept; } -static inline u64 vmx_get_perf_capabilities(void) -{ - u64 perf_cap = PMU_CAP_FW_WRITES; - struct x86_pmu_lbr lbr; - u64 host_perf_cap = 0; - - if (!enable_pmu) - return 0; - - if (boot_cpu_has(X86_FEATURE_PDCM)) - rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - - if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; - - if (vmx_pebs_supported()) { - perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; - } - - return perf_cap; -} - static inline bool cpu_has_notify_vmexit(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/hyperv.c similarity index 95% rename from arch/x86/kvm/vmx/evmcs.c rename to arch/x86/kvm/vmx/hyperv.c index d8b23c96d627234e67f50d3bad7eceef444291a5..ae03d1fe03552174ff6673edffd8980f123d3cda 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/hyperv.c @@ -3,9 +3,9 @@ #include #include -#include "../hyperv.h" #include "../cpuid.h" -#include "evmcs.h" +#include "hyperv.h" +#include "nested.h" #include "vmcs.h" #include "vmx.h" #include "trace.h" @@ -322,24 +322,17 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { }; const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1); -bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) +u64 nested_get_evmptr(struct kvm_vcpu *vcpu) { - struct hv_vp_assist_page assist_page; - - *evmcs_gpa = -1ull; - - if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page))) - return false; - - if (unlikely(!assist_page.enlighten_vmentry)) - return false; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); - if (unlikely(!evmptr_is_valid(assist_page.current_nested_vmcs))) - return false; + if (unlikely(kvm_hv_get_assist_page(vcpu))) + return EVMPTR_INVALID; - *evmcs_gpa = assist_page.current_nested_vmcs; + if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry)) + return EVMPTR_INVALID; - return true; + return hv_vcpu->vp_assist_page.current_nested_vmcs; } uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) @@ -507,3 +500,23 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu, return 0; } + +bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + + if (!hv_vcpu || !evmcs) + return false; + + if (!evmcs->hv_enlightenments_control.nested_flush_hypercall) + return false; + + return hv_vcpu->vp_assist_page.nested_control.features.directhypercall; +} + +void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu) +{ + nested_vmx_vmexit(vcpu, HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH, 0, 0); +} diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/hyperv.h similarity index 95% rename from arch/x86/kvm/vmx/evmcs.h rename to arch/x86/kvm/vmx/hyperv.h index 6f746ef3c0386ce277ef520d9a1746d0728a4ad8..571e7929d14e7b83058228291158f0e96933bf68 100644 --- a/arch/x86/kvm/vmx/evmcs.h +++ b/arch/x86/kvm/vmx/hyperv.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __KVM_X86_VMX_EVMCS_H -#define __KVM_X86_VMX_EVMCS_H +#ifndef __KVM_X86_VMX_HYPERV_H +#define __KVM_X86_VMX_HYPERV_H #include @@ -8,6 +8,8 @@ #include #include +#include "../hyperv.h" + #include "capabilities.h" #include "vmcs.h" #include "vmcs12.h" @@ -235,11 +237,13 @@ enum nested_evmptrld_status { EVMPTRLD_ERROR, }; -bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa); +u64 nested_get_evmptr(struct kvm_vcpu *vcpu); uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu); int nested_enable_evmcs(struct kvm_vcpu *vcpu, uint16_t *vmcs_version); void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata); int nested_evmcs_check_controls(struct vmcs12 *vmcs12); +bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu); +void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu); -#endif /* __KVM_X86_VMX_EVMCS_H */ +#endif /* __KVM_X86_VMX_HYPERV_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 5b0d4859e4b783b9cd60212e1d8d95e4e51c1bf1..b6f4411b613e9105d8cca1a989ee8f3a4a2a7197 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -7,7 +7,6 @@ #include #include "cpuid.h" -#include "evmcs.h" #include "hyperv.h" #include "mmu.h" #include "nested.h" @@ -16,6 +15,7 @@ #include "trace.h" #include "vmx.h" #include "x86.h" +#include "smm.h" static bool __read_mostly enable_shadow_vmcs = 1; module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO); @@ -225,6 +225,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx) static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) { + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) { @@ -233,6 +234,12 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) } vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; + + if (hv_vcpu) { + hv_vcpu->nested.pa_page_gpa = INVALID_GPA; + hv_vcpu->nested.vm_id = 0; + hv_vcpu->nested.vp_id = 0; + } } static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx, @@ -1125,6 +1132,15 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or + * L2's VP_ID upon request from the guest. Make sure we check for + * pending entries in the right FIFO upon L1/L2 transition as these + * requests are put by other vCPUs asynchronously. + */ + if (to_hv_vcpu(vcpu) && enable_ept) + kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu); + /* * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a @@ -1557,11 +1573,19 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields { struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; + struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu); /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ vmcs12->tpr_threshold = evmcs->tpr_threshold; vmcs12->guest_rip = evmcs->guest_rip; + if (unlikely(!(hv_clean_fields & + HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) { + hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page; + hv_vcpu->nested.vm_id = evmcs->hv_vm_id; + hv_vcpu->nested.vp_id = evmcs->hv_vp_id; + } + if (unlikely(!(hv_clean_fields & HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) { vmcs12->guest_rsp = evmcs->guest_rsp; @@ -1977,7 +2001,8 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld( if (likely(!guest_cpuid_has_evmcs(vcpu))) return EVMPTRLD_DISABLED; - if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) { + evmcs_gpa = nested_get_evmptr(vcpu); + if (!evmptr_is_valid(evmcs_gpa)) { nested_release_evmcs(vcpu); return EVMPTRLD_DISABLED; } @@ -2563,12 +2588,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, nested_ept_init_mmu_context(vcpu); /* - * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those - * bits which we consider mandatory enabled. - * The CR0_READ_SHADOW is what L2 should have expected to read given - * the specifications by L1; It's not enough to take - * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we - * have more bits than L1 expected. + * Override the CR0/CR4 read shadows after setting the effective guest + * CR0/CR4. The common helpers also set the shadows, but they don't + * account for vmcs12's cr0/4_guest_host_mask. */ vmx_set_cr0(vcpu, vmcs12->guest_cr0); vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); @@ -3251,6 +3273,12 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu) static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu) { + /* + * Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy + * in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory + * to make nested_evmcs_l2_tlb_flush_enabled() work correctly post + * migration. + */ if (!nested_get_evmcs_page(vcpu)) { pr_debug_ratelimited("%s: enlightened vmptrld failed\n", __func__); @@ -4767,6 +4795,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, vmx_switch_vmcs(vcpu, &vmx->vmcs01); + /* + * If IBRS is advertised to the vCPU, KVM must flush the indirect + * branch predictors when transitioning from L2 to L1, as L1 expects + * hardware (KVM in this case) to provide separate predictor modes. + * Bare metal isolates VMX root (host) from VMX non-root (guest), but + * doesn't isolate different VMCSs, i.e. in this case, doesn't provide + * separate modes for L2 vs L1. + */ + if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) + indirect_branch_prediction_barrier(); + /* Update any VMCS fields that might have changed while L2 ran */ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr); vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr); @@ -5100,24 +5139,35 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX; /* - * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks - * that have higher priority than VM-Exit (see Intel SDM's pseudocode - * for VMXON), as KVM must load valid CR0/CR4 values into hardware while - * running the guest, i.e. KVM needs to check the _guest_ values. + * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter + * the guest and so cannot rely on hardware to perform the check, + * which has higher priority than VM-Exit (see Intel SDM's pseudocode + * for VMXON). * - * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and - * !COMPATIBILITY modes. KVM may run the guest in VM86 to emulate Real - * Mode, but KVM will never take the guest out of those modes. + * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86 + * and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't + * force any of the relevant guest state. For a restricted guest, KVM + * does force CR0.PE=1, but only to also force VM86 in order to emulate + * Real Mode, and so there's no need to check CR0.PE manually. */ - if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || - !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) { kvm_queue_exception(vcpu, UD_VECTOR); return 1; } /* - * CPL=0 and all other checks that are lower priority than VM-Exit must - * be checked manually. + * The CPL is checked for "not in VMX operation" and for "in VMX root", + * and has higher priority than the VM-Fail due to being post-VMXON, + * i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root, + * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits + * from L2 to L1, i.e. there's no need to check for the vCPU being in + * VMX non-root. + * + * Forwarding the VM-Exit unconditionally, i.e. without performing the + * #UD checks (see above), is functionally ok because KVM doesn't allow + * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's + * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are + * missed by hardware due to shadowing CR0 and/or CR4. */ if (vmx_get_cpl(vcpu)) { kvm_inject_gp(vcpu, 0); @@ -5127,6 +5177,17 @@ static int handle_vmxon(struct kvm_vcpu *vcpu) if (vmx->nested.vmxon) return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); + /* + * Invalid CR0/CR4 generates #GP. These checks are performed if and + * only if the vCPU isn't already in VMX operation, i.e. effectively + * have lower priority than the VM-Fail above. + */ + if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) || + !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) { + kvm_inject_gp(vcpu, 0); + return 1; + } + if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES) != VMXON_NEEDED_FEATURES) { kvm_inject_gp(vcpu, 0); @@ -5206,7 +5267,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); u32 zero = 0; gpa_t vmptr; - u64 evmcs_gpa; int r; if (!nested_vmx_check_permission(vcpu)) @@ -5232,7 +5292,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) * vmx->nested.hv_evmcs but this shouldn't be a problem. */ if (likely(!guest_cpuid_has_evmcs(vcpu) || - !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) { + !evmptr_is_valid(nested_get_evmptr(vcpu)))) { if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vcpu); @@ -6129,6 +6189,11 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, * Handle L2's bus locks in L0 directly. */ return true; + case EXIT_REASON_VMCALL: + /* Hyper-V L2 TLB flush hypercall is handled by L0 */ + return guest_hv_cpuid_has_l2_tlb_flush(vcpu) && + nested_evmcs_l2_tlb_flush_enabled(vcpu) && + kvm_hv_is_tlb_flush_hcall(vcpu); default: break; } @@ -6980,4 +7045,5 @@ struct kvm_x86_nested_ops vmx_nested_ops = { .write_log_dirty = nested_vmx_write_pml_buffer, .enable_evmcs = nested_enable_evmcs, .get_evmcs_version = nested_get_evmcs_version, + .hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush, }; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index 6312c9541c3cd3bc324fdf630099fed1a1134be9..96952263b029bfed932ec13190c545843b6acff0 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -79,9 +79,10 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) } /* - * Return the cr0 value that a nested guest would read. This is a combination - * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by - * its hypervisor (cr0_read_shadow). + * Return the cr0/4 value that a nested guest would read. This is a combination + * of L1's "real" cr0 used to run the guest (guest_cr0), and the bits shadowed + * by the L1 hypervisor (cr0_read_shadow). KVM must emulate CPU behavior as + * the value+mask loaded into vmcs02 may not match the vmcs12 fields. */ static inline unsigned long nested_read_cr0(struct vmcs12 *fields) { diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 10b33da9bd05852060b405aeb09498225f671a1f..e5cec07ca8d9574d86793806675f3a26ab23767c 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -52,7 +52,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -76,7 +76,7 @@ static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { pmc = intel_pmc_idx_to_pmc(pmu, bit); if (pmc) - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); } } @@ -477,7 +477,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; if (!(data & reserved_bits)) { pmc->eventsel = data; - reprogram_counter(pmc); + kvm_pmu_request_counter_reprogam(pmc); return 0; } } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) @@ -631,7 +631,6 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->fixed_counters[i].current_config = 0; } - vcpu->arch.perf_capabilities = vmx_get_perf_capabilities(); lbr_desc->records.nr = 0; lbr_desc->event = NULL; lbr_desc->msr_passthrough = false; @@ -647,14 +646,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc = &pmu->gp_counters[i]; pmc_stop_counter(pmc); - pmc->counter = pmc->eventsel = 0; + pmc->counter = pmc->prev_counter = pmc->eventsel = 0; } for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); - pmc->counter = 0; + pmc->counter = pmc->prev_counter = 0; } pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0; diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c index 8f95c7c0143359cc617641eddbf422018d77cf63..b12da2a6dec95644941c67200c242fb2982b0742 100644 --- a/arch/x86/kvm/vmx/sgx.c +++ b/arch/x86/kvm/vmx/sgx.c @@ -182,8 +182,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, /* Enforce CPUID restriction on max enclave size. */ max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : sgx_12_0->edx; - if (size >= BIT_ULL(max_size_log2)) + if (size >= BIT_ULL(max_size_log2)) { kvm_inject_gp(vcpu, 0); + return 1; + } /* * sgx_virt_ecreate() returns: diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index 746129ddd5ae02b0148130122728fd490a87ff91..01936013428b5c0a7e9e5cbd0fafe1d38770e4d0 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -208,9 +208,8 @@ struct __packed vmcs12 { /* * For save/restore compatibility, the vmcs12 field offsets must not change. */ -#define CHECK_OFFSET(field, loc) \ - BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \ - "Offset of " #field " in struct vmcs12 has changed.") +#define CHECK_OFFSET(field, loc) \ + ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc) static inline void vmx_check_vmcs12_offsets(void) { diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 0b5db4de4d09e568cc01b6005748f97104d02ec8..766c6b3ef5ed95f5d4546cd4254aec8d275de2a7 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -269,6 +269,7 @@ SYM_FUNC_END(__vmx_vcpu_run) .section .text, "ax" +#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT /** * vmread_error_trampoline - Trampoline from inline asm to vmread_error() * @field: VMCS field encoding that failed @@ -317,6 +318,7 @@ SYM_FUNC_START(vmread_error_trampoline) RET SYM_FUNC_END(vmread_error_trampoline) +#endif SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff) /* diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 63247c57c72cc6ef206ef29f4b3d0fb117e9d93f..fe5615fd8295c4f615f5fe44785f24617b042a0d 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -51,7 +51,6 @@ #include "capabilities.h" #include "cpuid.h" -#include "evmcs.h" #include "hyperv.h" #include "kvm_onhyperv.h" #include "irq.h" @@ -66,6 +65,7 @@ #include "vmcs12.h" #include "vmx.h" #include "x86.h" +#include "smm.h" MODULE_AUTHOR("Qumranet"); MODULE_LICENSE("GPL"); @@ -526,7 +526,7 @@ static unsigned long host_idt_base; static bool __read_mostly enlightened_vmcs = true; module_param(enlightened_vmcs, bool, 0444); -static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu) +static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu) { struct hv_enlightened_vmcs *evmcs; struct hv_partition_assist_pg **p_hv_pa_pg = @@ -858,7 +858,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx) * to change it directly without causing a vmexit. In that case read * it after vmexit and store it in vmx->spec_ctrl. */ - if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))) + if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)) flags |= VMX_RUN_SAVE_SPEC_CTRL; return flags; @@ -1348,8 +1348,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, /* * No indirect branch prediction barrier needed when switching - * the active VMCS within a guest, e.g. on nested VM-Enter. - * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + * the active VMCS within a vCPU, unless IBRS is advertised to + * the vCPU. To minimize the number of IBPBs executed, KVM + * performs IBPB on nested VM-Exit (a single nested transition + * may switch the active VMCS multiple times). */ if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) indirect_branch_prediction_barrier(); @@ -1834,12 +1836,42 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu) return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX); } -static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu, - uint64_t val) +/* + * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of + * guest CPUID. Note, KVM allows userspace to set "VMX in SMX" to maintain + * backwards compatibility even though KVM doesn't support emulating SMX. And + * because userspace set "VMX in SMX", the guest must also be allowed to set it, + * e.g. if the MSR is left unlocked and the guest does a RMW operation. + */ +#define KVM_SUPPORTED_FEATURE_CONTROL (FEAT_CTL_LOCKED | \ + FEAT_CTL_VMX_ENABLED_INSIDE_SMX | \ + FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \ + FEAT_CTL_SGX_LC_ENABLED | \ + FEAT_CTL_SGX_ENABLED | \ + FEAT_CTL_LMCE_ENABLED) + +static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, + struct msr_data *msr) { - uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits; + uint64_t valid_bits; + + /* + * Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are + * exposed to the guest. + */ + WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits & + ~KVM_SUPPORTED_FEATURE_CONTROL); + + if (!msr->host_initiated && + (vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED)) + return false; + + if (msr->host_initiated) + valid_bits = KVM_SUPPORTED_FEATURE_CONTROL; + else + valid_bits = vmx->msr_ia32_feature_control_valid_bits; - return !(val & ~valid_bits); + return !(msr->data & ~valid_bits); } static int vmx_get_msr_feature(struct kvm_msr_entry *msr) @@ -1849,9 +1881,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); - case MSR_IA32_PERF_CAPABILITIES: - msr->data = vmx_get_perf_capabilities(); - return 0; default: return KVM_MSR_RET_INVALID; } @@ -2029,7 +2058,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))) debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT; - if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) && + if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) && (host_initiated || intel_pmu_lbr_is_enabled(vcpu))) debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI; @@ -2241,10 +2270,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vcpu->arch.mcg_ext_ctl = data; break; case MSR_IA32_FEAT_CTL: - if (!vmx_feature_control_msr_valid(vcpu, data) || - (to_vmx(vcpu)->msr_ia32_feature_control & - FEAT_CTL_LOCKED && !msr_info->host_initiated)) + if (!is_vmx_feature_control_msr_valid(vmx, msr_info)) return 1; + vmx->msr_ia32_feature_control = data; if (msr_info->host_initiated && data == 0) vmx_leave_nested(vcpu); @@ -2342,14 +2370,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (data & PMU_CAP_LBR_FMT) { if ((data & PMU_CAP_LBR_FMT) != - (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)) + (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT)) return 1; if (!cpuid_model_is_consistent(vcpu)) return 1; } if (data & PERF_CAP_PEBS_FORMAT) { if ((data & PERF_CAP_PEBS_MASK) != - (vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK)) + (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK)) return 1; if (!guest_cpuid_has(vcpu, X86_FEATURE_DS)) return 1; @@ -6844,6 +6872,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_SMBASE: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + return false; /* * We cannot do SMM unless we can run the guest in big * real mode. @@ -7669,6 +7699,31 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_update_exception_bitmap(vcpu); } +static u64 vmx_get_perf_capabilities(void) +{ + u64 perf_cap = PMU_CAP_FW_WRITES; + struct x86_pmu_lbr lbr; + u64 host_perf_cap = 0; + + if (!enable_pmu) + return 0; + + if (boot_cpu_has(X86_FEATURE_PDCM)) + rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); + + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + + if (vmx_pebs_supported()) { + perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; + if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) + perf_cap &= ~PERF_CAP_PEBS_BASELINE; + } + + return perf_cap; +} + static __init void vmx_set_cpu_caps(void) { kvm_set_cpu_caps(); @@ -7691,6 +7746,7 @@ static __init void vmx_set_cpu_caps(void) if (!enable_pmu) kvm_cpu_cap_clear(X86_FEATURE_PDCM); + kvm_caps.supported_perf_cap = vmx_get_perf_capabilities(); if (!enable_sgx) { kvm_cpu_cap_clear(X86_FEATURE_SGX); @@ -7906,6 +7962,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEAT_CTL_LMCE_ENABLED; } +#ifdef CONFIG_KVM_SMM static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ @@ -7914,7 +7971,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !is_smm(vcpu); } -static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -7935,7 +7992,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) { struct vcpu_vmx *vmx = to_vmx(vcpu); int ret; @@ -7960,6 +8017,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu) { /* RSM will cause a vmexit anyway. */ } +#endif static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) { @@ -8127,10 +8185,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .setup_mce = vmx_setup_mce, +#ifdef CONFIG_KVM_SMM .smi_allowed = vmx_smi_allowed, .enter_smm = vmx_enter_smm, .leave_smm = vmx_leave_smm, .enable_smi_window = vmx_enable_smi_window, +#endif .can_emulate_instruction = vmx_can_emulate_instruction, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, @@ -8490,8 +8550,8 @@ static int __init vmx_init(void) } if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH) - vmx_x86_ops.enable_direct_tlbflush - = hv_enable_direct_tlbflush; + vmx_x86_ops.enable_l2_tlb_flush + = hv_enable_l2_tlb_flush; } else { enlightened_vmcs = false; diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index ec268df83ed67988f76a007ad65e9d77aa5d7a3f..842dc898c972876067bc8de27f5eef3cf506a7c9 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -6,19 +6,33 @@ #include -#include "evmcs.h" +#include "hyperv.h" #include "vmcs.h" #include "../x86.h" void vmread_error(unsigned long field, bool fault); -__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field, - bool fault); void vmwrite_error(unsigned long field, unsigned long value); void vmclear_error(struct vmcs *vmcs, u64 phys_addr); void vmptrld_error(struct vmcs *vmcs, u64 phys_addr); void invvpid_error(unsigned long ext, u16 vpid, gva_t gva); void invept_error(unsigned long ext, u64 eptp, gpa_t gpa); +#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT +/* + * The VMREAD error trampoline _always_ uses the stack to pass parameters, even + * for 64-bit targets. Preserving all registers allows the VMREAD inline asm + * blob to avoid clobbering GPRs, which in turn allows the compiler to better + * optimize sequences of VMREADs. + * + * Declare the trampoline as an opaque label as it's not safe to call from C + * code; there is no way to tell the compiler to pass params on the stack for + * 64-bit targets. + * + * void vmread_error_trampoline(unsigned long field, bool fault); + */ +extern unsigned long vmread_error_trampoline; +#endif + static __always_inline void vmcs_check16(unsigned long field) { BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 69227f77b201d7d9d89a77bdefc2d44f338e3ba8..312aea1854ae6b8c8b4a64884ffaf9b9605c6e29 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -30,6 +30,7 @@ #include "hyperv.h" #include "lapic.h" #include "xen.h" +#include "smm.h" #include #include @@ -119,8 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); -static void process_smi(struct kvm_vcpu *vcpu); -static void enter_smm(struct kvm_vcpu *vcpu); static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags); static void store_regs(struct kvm_vcpu *vcpu); static int sync_regs(struct kvm_vcpu *vcpu); @@ -464,7 +463,6 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu) { return vcpu->arch.apic_base; } -EXPORT_SYMBOL_GPL(kvm_get_apic_base); enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu) { @@ -492,7 +490,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) kvm_recalculate_apic_map(vcpu->kvm); return 0; } -EXPORT_SYMBOL_GPL(kvm_set_apic_base); /* * Handle a fault on a hardware virtualization (VMX or SVM) instruction. @@ -783,7 +780,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code, fault->address); } -EXPORT_SYMBOL_GPL(kvm_inject_page_fault); void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) @@ -812,7 +808,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu) atomic_inc(&vcpu->arch.nmi_queued); kvm_make_request(KVM_REQ_NMI, vcpu); } -EXPORT_SYMBOL_GPL(kvm_inject_nmi); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code) { @@ -837,7 +832,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) kvm_queue_exception_e(vcpu, GP_VECTOR, 0); return false; } -EXPORT_SYMBOL_GPL(kvm_require_cpl); bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr) { @@ -1654,6 +1648,9 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) case MSR_IA32_ARCH_CAPABILITIES: msr->data = kvm_get_arch_capabilities(); break; + case MSR_IA32_PERF_CAPABILITIES: + msr->data = kvm_caps.supported_perf_cap; + break; case MSR_IA32_UCODE_REV: rdmsrl_safe(msr->index, &msr->data); break; @@ -2067,7 +2064,6 @@ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu) { return kvm_skip_emulated_instruction(vcpu); } -EXPORT_SYMBOL_GPL(kvm_emulate_as_nop); int kvm_emulate_invd(struct kvm_vcpu *vcpu) { @@ -2315,13 +2311,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time, kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu); /* we verify if the enable bit is set... */ - if (system_time & 1) { - kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu, - KVM_HOST_USES_PFN, system_time & ~1ULL, + if (system_time & 1) + kvm_gpc_activate(&vcpu->arch.pv_time, system_time & ~1ULL, sizeof(struct pvclock_vcpu_time_info)); - } else { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); - } + else + kvm_gpc_deactivate(&vcpu->arch.pv_time); return; } @@ -2513,7 +2507,6 @@ u64 kvm_scale_tsc(u64 tsc, u64 ratio) return _tsc; } -EXPORT_SYMBOL_GPL(kvm_scale_tsc); static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) { @@ -2972,6 +2965,22 @@ static void kvm_update_masterclock(struct kvm *kvm) kvm_end_pvclock_update(kvm); } +/* + * Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's + * per-CPU value (which may be zero if a CPU is going offline). Note, tsc_khz + * can change during boot even if the TSC is constant, as it's possible for KVM + * to be loaded before TSC calibration completes. Ideally, KVM would get a + * notification when calibration completes, but practically speaking calibration + * will complete before userspace is alive enough to create VMs. + */ +static unsigned long get_cpu_tsc_khz(void) +{ + if (static_cpu_has(X86_FEATURE_CONSTANT_TSC)) + return tsc_khz; + else + return __this_cpu_read(cpu_tsc_khz); +} + /* Called within read_seqcount_begin/retry for kvm->pvclock_sc. */ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) { @@ -2982,7 +2991,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) get_cpu(); data->flags = 0; - if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) { + if (ka->use_master_clock && + (static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) { #ifdef CONFIG_X86_64 struct timespec64 ts; @@ -2996,7 +3006,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data) data->flags |= KVM_CLOCK_TSC_STABLE; hv_clock.tsc_timestamp = ka->master_cycle_now; hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset; - kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL, + kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL, &hv_clock.tsc_shift, &hv_clock.tsc_to_system_mul); data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc); @@ -3035,12 +3045,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v, unsigned long flags; read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) { + while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - offset + sizeof(*guest_hv_clock))) + if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock))) return; read_lock_irqsave(&gpc->lock, flags); @@ -3106,7 +3114,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) /* Keep irq disabled to prevent changes to the clock */ local_irq_save(flags); - tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz); + tgt_tsc_khz = get_cpu_tsc_khz(); if (unlikely(tgt_tsc_khz == 0)) { local_irq_restore(flags); kvm_make_request(KVM_REQ_CLOCK_UPDATE, v); @@ -3389,7 +3397,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) static void kvmclock_reset(struct kvm_vcpu *vcpu) { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time); + kvm_gpc_deactivate(&vcpu->arch.pv_time); vcpu->arch.time = 0; } @@ -3397,6 +3405,9 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; static_call(kvm_x86_flush_tlb_all)(vcpu); + + /* Flushing all ASIDs flushes the current ASID... */ + kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) @@ -3415,6 +3426,12 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) } static_call(kvm_x86_flush_tlb_guest)(vcpu); + + /* + * Flushing all "guest" TLB is always a superset of Hyper-V's fine + * grained flushing. + */ + kvm_hv_vcpu_purge_flush_tlb(vcpu); } @@ -3566,20 +3583,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.arch_capabilities = data; break; - case MSR_IA32_PERF_CAPABILITIES: { - struct kvm_msr_entry msr_ent = {.index = msr, .data = 0}; - + case MSR_IA32_PERF_CAPABILITIES: if (!msr_info->host_initiated) return 1; - if (kvm_get_msr_feature(&msr_ent)) - return 1; - if (data & ~msr_ent.data) + if (data & ~kvm_caps.supported_perf_cap) return 1; vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); return 0; - } case MSR_EFER: return set_efer(vcpu, msr_info); case MSR_K7_HWCR: @@ -3651,7 +3663,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; } case MSR_IA32_SMBASE: - if (!msr_info->host_initiated) + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) return 1; vcpu->arch.smbase = data; break; @@ -4067,7 +4079,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.ia32_misc_enable_msr; break; case MSR_IA32_SMBASE: - if (!msr_info->host_initiated) + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) return 1; msr_info->data = vcpu->arch.smbase; break; @@ -4425,7 +4437,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL | KVM_XEN_HVM_CONFIG_EVTCHN_SEND; if (sched_info_on()) - r |= KVM_XEN_HVM_CONFIG_RUNSTATE; + r |= KVM_XEN_HVM_CONFIG_RUNSTATE | + KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG; break; #endif case KVM_CAP_SYNC_REGS: @@ -4441,6 +4454,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r |= KVM_X86_DISABLE_EXITS_MWAIT; break; case KVM_CAP_X86_SMM: + if (!IS_ENABLED(CONFIG_KVM_SMM)) + break; + /* SMBASE is usually relocated above 1M on modern chipsets, * and SMM handlers might indeed rely on 4G segment limits, * so do not report SMM to be available if real mode is @@ -4481,7 +4497,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0; break; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: - r = kvm_x86_ops.enable_direct_tlbflush != NULL; + r = kvm_x86_ops.enable_l2_tlb_flush != NULL; break; case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; @@ -4897,13 +4913,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu) return 0; } -static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu) -{ - kvm_make_request(KVM_REQ_SMI, vcpu); - - return 0; -} - static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu, struct kvm_tpr_access_ctl *tac) { @@ -5039,8 +5048,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, process_nmi(vcpu); +#ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); +#endif /* * KVM's ABI only allows for one exception to be migrated. Luckily, @@ -5068,16 +5079,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, ex->pending && ex->has_payload) kvm_deliver_exception_payload(vcpu, ex); + memset(events, 0, sizeof(*events)); + /* * The API doesn't provide the instruction length for software * exceptions, so don't report them. As long as the guest RIP * isn't advanced, we should expect to encounter the exception * again. */ - if (kvm_exception_is_soft(ex->vector)) { - events->exception.injected = 0; - events->exception.pending = 0; - } else { + if (!kvm_exception_is_soft(ex->vector)) { events->exception.injected = ex->injected; events->exception.pending = ex->pending; /* @@ -5097,20 +5107,20 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->interrupt.injected = vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft; events->interrupt.nr = vcpu->arch.interrupt.nr; - events->interrupt.soft = 0; events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); events->nmi.injected = vcpu->arch.nmi_injected; events->nmi.pending = vcpu->arch.nmi_pending != 0; events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu); - events->nmi.pad = 0; - events->sipi_vector = 0; /* never valid when reporting to user space */ + /* events->sipi_vector is never valid when reporting to user space */ +#ifdef CONFIG_KVM_SMM events->smi.smm = is_smm(vcpu); events->smi.pending = vcpu->arch.smi_pending; events->smi.smm_inside_nmi = !!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK); +#endif events->smi.latched_init = kvm_lapic_latched_init(vcpu); events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING @@ -5122,12 +5132,8 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu); events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; } - - memset(&events->reserved, 0, sizeof(events->reserved)); } -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm); - static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, struct kvm_vcpu_events *events) { @@ -5200,6 +5206,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.apic->sipi_vector = events->sipi_vector; if (events->flags & KVM_VCPUEVENT_VALID_SMM) { +#ifdef CONFIG_KVM_SMM if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { kvm_leave_nested(vcpu); kvm_smm_changed(vcpu, events->smi.smm); @@ -5214,6 +5221,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK; } +#else + if (events->smi.smm || events->smi.pending || + events->smi.smm_inside_nmi) + return -EINVAL; +#endif + if (lapic_in_kernel(vcpu)) { if (events->smi.latched_init) set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); @@ -5497,10 +5510,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, } return r; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: - if (!kvm_x86_ops.enable_direct_tlbflush) + if (!kvm_x86_ops.enable_l2_tlb_flush) return -ENOTTY; - return static_call(kvm_x86_enable_direct_tlbflush)(vcpu); + return static_call(kvm_x86_enable_l2_tlb_flush)(vcpu); case KVM_CAP_HYPERV_ENFORCE_CPUID: return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]); @@ -5580,7 +5593,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; } case KVM_SMI: { - r = kvm_vcpu_ioctl_smi(vcpu); + r = kvm_inject_smi(vcpu); break; } case KVM_SET_CPUID: { @@ -6239,9 +6252,7 @@ split_irqchip_unlock: break; case KVM_CAP_X86_USER_SPACE_MSR: r = -EINVAL; - if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | - KVM_MSR_EXIT_REASON_UNKNOWN | - KVM_MSR_EXIT_REASON_FILTER)) + if (cap->args[0] & ~KVM_MSR_EXIT_REASON_VALID_MASK) break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; @@ -6418,7 +6429,7 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, if (!user_range->nmsrs) return 0; - if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) + if (user_range->flags & ~KVM_MSR_FILTER_RANGE_VALID_MASK) return -EINVAL; if (!user_range->flags) @@ -6452,7 +6463,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, int r = 0; u32 i; - if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY) + if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK) return -EINVAL; for (i = 0; i < ARRAY_SIZE(filter->ranges); i++) @@ -7125,8 +7136,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) return handled; } -static void kvm_set_segment(struct kvm_vcpu *vcpu, - struct kvm_segment *var, int seg) +void kvm_set_segment(struct kvm_vcpu *vcpu, + struct kvm_segment *var, int seg) { static_call(kvm_x86_set_segment)(vcpu, var, seg); } @@ -7162,16 +7173,6 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, } EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read); - gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, - struct x86_exception *exception) -{ - struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - - u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0; - access |= PFERR_FETCH_MASK; - return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception); -} - gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, struct x86_exception *exception) { @@ -7284,15 +7285,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt, return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception); } -static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt, - unsigned long addr, void *val, unsigned int bytes) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes); - - return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE; -} - static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, struct kvm_vcpu *vcpu, u64 access, struct x86_exception *exception) @@ -8084,26 +8076,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt, return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata); } -static int emulator_set_msr(struct x86_emulate_ctxt *ctxt, - u32 msr_index, u64 data) -{ - return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data); -} - -static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - return vcpu->arch.smbase; -} - -static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase) -{ - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - vcpu->arch.smbase = smbase; -} - static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt, u32 pmc) { @@ -8178,18 +8150,13 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) return emul_to_vcpu(ctxt)->arch.hflags; } -static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt) +#ifndef CONFIG_KVM_SMM +static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) { - struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); - - kvm_smm_changed(vcpu, false); -} - -static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt, - const char *smstate) -{ - return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate); + WARN_ON_ONCE(1); + return X86EMUL_UNHANDLEABLE; } +#endif static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt) { @@ -8215,7 +8182,6 @@ static const struct x86_emulate_ops emulate_ops = { .write_gpr = emulator_write_gpr, .read_std = emulator_read_std, .write_std = emulator_write_std, - .read_phys = kvm_read_guest_phys_system, .fetch = kvm_fetch_guest_virt, .read_emulated = emulator_read_emulated, .write_emulated = emulator_write_emulated, @@ -8235,11 +8201,8 @@ static const struct x86_emulate_ops emulate_ops = { .cpl = emulator_get_cpl, .get_dr = emulator_get_dr, .set_dr = emulator_set_dr, - .get_smbase = emulator_get_smbase, - .set_smbase = emulator_set_smbase, .set_msr_with_filter = emulator_set_msr_with_filter, .get_msr_with_filter = emulator_get_msr_with_filter, - .set_msr = emulator_set_msr, .get_msr = emulator_get_msr, .check_pmc = emulator_check_pmc, .read_pmc = emulator_read_pmc, @@ -8254,7 +8217,6 @@ static const struct x86_emulate_ops emulate_ops = { .guest_has_rdpid = emulator_guest_has_rdpid, .set_nmi_mask = emulator_set_nmi_mask, .get_hflags = emulator_get_hflags, - .exiting_smm = emulator_exiting_smm, .leave_smm = emulator_leave_smm, .triple_fault = emulator_triple_fault, .set_xcr = emulator_set_xcr, @@ -8327,8 +8289,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) cs_db ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK); - BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK); - BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK); ctxt->interruptibility = 0; ctxt->have_exception = false; @@ -8587,29 +8547,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt, static int complete_emulated_mmio(struct kvm_vcpu *vcpu); static int complete_emulated_pio(struct kvm_vcpu *vcpu); -static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) -{ - trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); - - if (entering_smm) { - vcpu->arch.hflags |= HF_SMM_MASK; - } else { - vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK); - - /* Process a latched INIT or SMI, if any. */ - kvm_make_request(KVM_REQ_EVENT, vcpu); - - /* - * Even if KVM_SET_SREGS2 loaded PDPTRs out of band, - * on SMM exit we still need to reload them from - * guest memory - */ - vcpu->arch.pdptrs_from_userspace = false; - } - - kvm_mmu_reset_context(vcpu); -} - static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7, unsigned long *db) { @@ -8841,7 +8778,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, write_fault_to_spt, emulation_type)) return 1; - if (ctxt->have_exception) { + + if (ctxt->have_exception && + !(emulation_type & EMULTYPE_SKIP)) { /* * #UD should result in just EMULATION_FAILED, and trap-like * exception should not be encountered during decode. @@ -9105,9 +9044,11 @@ static void tsc_khz_changed(void *data) struct cpufreq_freqs *freq = data; unsigned long khz = 0; + WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)); + if (data) khz = freq->new; - else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + else khz = cpufreq_quick_get(raw_smp_processor_id()); if (!khz) khz = tsc_khz; @@ -9128,8 +9069,10 @@ static void kvm_hyperv_tsc_notifier(void) hyperv_stop_tsc_emulation(); /* TSC frequency always matches when on Hyper-V */ - for_each_present_cpu(cpu) - per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { + for_each_present_cpu(cpu) + per_cpu(cpu_tsc_khz, cpu) = tsc_khz; + } kvm_caps.max_guest_tsc_khz = tsc_khz; list_for_each_entry(kvm, &vm_list, vm_list) { @@ -9266,10 +9209,10 @@ static void kvm_timer_init(void) } cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - } - cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", - kvmclock_cpu_online, kvmclock_cpu_down_prep); + cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", + kvmclock_cpu_online, kvmclock_cpu_down_prep); + } } #ifdef CONFIG_X86_64 @@ -9429,10 +9372,11 @@ void kvm_arch_exit(void) #endif kvm_lapic_exit(); - if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); - cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); + cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE); + } #ifdef CONFIG_X86_64 pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier); irq_work_sync(&pvclock_irq_work); @@ -9999,6 +9943,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, * in order to make progress and get back here for another iteration. * The kvm_x86_ops hooks communicate this by returning -EBUSY. */ +#ifdef CONFIG_KVM_SMM if (vcpu->arch.smi_pending) { r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY; if (r < 0) @@ -10011,6 +9956,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu, } else static_call(kvm_x86_enable_smi_window)(vcpu); } +#endif if (vcpu->arch.nmi_pending) { r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY; @@ -10086,246 +10032,6 @@ static void process_nmi(struct kvm_vcpu *vcpu) kvm_make_request(KVM_REQ_EVENT, vcpu); } -static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) -{ - u32 flags = 0; - flags |= seg->g << 23; - flags |= seg->db << 22; - flags |= seg->l << 21; - flags |= seg->avl << 20; - flags |= seg->present << 15; - flags |= seg->dpl << 13; - flags |= seg->s << 12; - flags |= seg->type << 8; - return flags; -} - -static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) -{ - struct kvm_segment seg; - int offset; - - kvm_get_segment(vcpu, &seg, n); - put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector); - - if (n < 3) - offset = 0x7f84 + n * 12; - else - offset = 0x7f2c + (n - 3) * 12; - - put_smstate(u32, buf, offset + 8, seg.base); - put_smstate(u32, buf, offset + 4, seg.limit); - put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg)); -} - -#ifdef CONFIG_X86_64 -static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) -{ - struct kvm_segment seg; - int offset; - u16 flags; - - kvm_get_segment(vcpu, &seg, n); - offset = 0x7e00 + n * 16; - - flags = enter_smm_get_segment_flags(&seg) >> 8; - put_smstate(u16, buf, offset, seg.selector); - put_smstate(u16, buf, offset + 2, flags); - put_smstate(u32, buf, offset + 4, seg.limit); - put_smstate(u64, buf, offset + 8, seg.base); -} -#endif - -static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) -{ - struct desc_ptr dt; - struct kvm_segment seg; - unsigned long val; - int i; - - put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); - put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); - put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); - put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); - - for (i = 0; i < 8; i++) - put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); - - kvm_get_dr(vcpu, 6, &val); - put_smstate(u32, buf, 0x7fcc, (u32)val); - kvm_get_dr(vcpu, 7, &val); - put_smstate(u32, buf, 0x7fc8, (u32)val); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - put_smstate(u32, buf, 0x7fc4, seg.selector); - put_smstate(u32, buf, 0x7f64, seg.base); - put_smstate(u32, buf, 0x7f60, seg.limit); - put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - put_smstate(u32, buf, 0x7fc0, seg.selector); - put_smstate(u32, buf, 0x7f80, seg.base); - put_smstate(u32, buf, 0x7f7c, seg.limit); - put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); - - static_call(kvm_x86_get_gdt)(vcpu, &dt); - put_smstate(u32, buf, 0x7f74, dt.address); - put_smstate(u32, buf, 0x7f70, dt.size); - - static_call(kvm_x86_get_idt)(vcpu, &dt); - put_smstate(u32, buf, 0x7f58, dt.address); - put_smstate(u32, buf, 0x7f54, dt.size); - - for (i = 0; i < 6; i++) - enter_smm_save_seg_32(vcpu, buf, i); - - put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); - - /* revision id */ - put_smstate(u32, buf, 0x7efc, 0x00020000); - put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase); -} - -#ifdef CONFIG_X86_64 -static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) -{ - struct desc_ptr dt; - struct kvm_segment seg; - unsigned long val; - int i; - - for (i = 0; i < 16; i++) - put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); - - put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu)); - put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); - - kvm_get_dr(vcpu, 6, &val); - put_smstate(u64, buf, 0x7f68, val); - kvm_get_dr(vcpu, 7, &val); - put_smstate(u64, buf, 0x7f60, val); - - put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); - put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); - put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); - - put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase); - - /* revision id */ - put_smstate(u32, buf, 0x7efc, 0x00020064); - - put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); - put_smstate(u16, buf, 0x7e90, seg.selector); - put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); - put_smstate(u32, buf, 0x7e94, seg.limit); - put_smstate(u64, buf, 0x7e98, seg.base); - - static_call(kvm_x86_get_idt)(vcpu, &dt); - put_smstate(u32, buf, 0x7e84, dt.size); - put_smstate(u64, buf, 0x7e88, dt.address); - - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); - put_smstate(u16, buf, 0x7e70, seg.selector); - put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); - put_smstate(u32, buf, 0x7e74, seg.limit); - put_smstate(u64, buf, 0x7e78, seg.base); - - static_call(kvm_x86_get_gdt)(vcpu, &dt); - put_smstate(u32, buf, 0x7e64, dt.size); - put_smstate(u64, buf, 0x7e68, dt.address); - - for (i = 0; i < 6; i++) - enter_smm_save_seg_64(vcpu, buf, i); -} -#endif - -static void enter_smm(struct kvm_vcpu *vcpu) -{ - struct kvm_segment cs, ds; - struct desc_ptr dt; - unsigned long cr0; - char buf[512]; - - memset(buf, 0, 512); -#ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - enter_smm_save_state_64(vcpu, buf); - else -#endif - enter_smm_save_state_32(vcpu, buf); - - /* - * Give enter_smm() a chance to make ISA-specific changes to the vCPU - * state (e.g. leave guest mode) after we've saved the state into the - * SMM state-save area. - */ - static_call(kvm_x86_enter_smm)(vcpu, buf); - - kvm_smm_changed(vcpu, true); - kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); - - if (static_call(kvm_x86_get_nmi_mask)(vcpu)) - vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; - else - static_call(kvm_x86_set_nmi_mask)(vcpu, true); - - kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); - kvm_rip_write(vcpu, 0x8000); - - cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); - static_call(kvm_x86_set_cr0)(vcpu, cr0); - vcpu->arch.cr0 = cr0; - - static_call(kvm_x86_set_cr4)(vcpu, 0); - - /* Undocumented: IDT limit is set to zero on entry to SMM. */ - dt.address = dt.size = 0; - static_call(kvm_x86_set_idt)(vcpu, &dt); - - kvm_set_dr(vcpu, 7, DR7_FIXED_1); - - cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; - cs.base = vcpu->arch.smbase; - - ds.selector = 0; - ds.base = 0; - - cs.limit = ds.limit = 0xffffffff; - cs.type = ds.type = 0x3; - cs.dpl = ds.dpl = 0; - cs.db = ds.db = 0; - cs.s = ds.s = 1; - cs.l = ds.l = 0; - cs.g = ds.g = 1; - cs.avl = ds.avl = 0; - cs.present = ds.present = 1; - cs.unusable = ds.unusable = 0; - cs.padding = ds.padding = 0; - - kvm_set_segment(vcpu, &cs, VCPU_SREG_CS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_DS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_ES); - kvm_set_segment(vcpu, &ds, VCPU_SREG_FS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_GS); - kvm_set_segment(vcpu, &ds, VCPU_SREG_SS); - -#ifdef CONFIG_X86_64 - if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) - static_call(kvm_x86_set_efer)(vcpu, 0); -#endif - - kvm_update_cpuid_runtime(vcpu); - kvm_mmu_reset_context(vcpu); -} - -static void process_smi(struct kvm_vcpu *vcpu) -{ - vcpu->arch.smi_pending = true; - kvm_make_request(KVM_REQ_EVENT, vcpu); -} - void kvm_make_scan_ioapic_request_mask(struct kvm *kvm, unsigned long *vcpu_bitmap) { @@ -10516,20 +10222,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - /* Forbid vmenter if vcpu dirty ring is soft-full */ - if (unlikely(vcpu->kvm->dirty_ring_size && - kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) { - vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL; - trace_kvm_dirty_ring_exit(vcpu); - r = 0; - goto out; - } - if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) { r = -EIO; goto out; } + + if (kvm_dirty_ring_check_request(vcpu)) { + r = 0; + goto out; + } + if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) { if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) { r = 0; @@ -10553,14 +10256,27 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu)) kvm_mmu_load_pgd(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + + /* + * Note, the order matters here, as flushing "all" TLB entries + * also flushes the "current" TLB entries, i.e. servicing the + * flush "all" will clear any request to flush "current". + */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) kvm_vcpu_flush_tlb_all(vcpu); - /* Flushing all ASIDs flushes the current ASID... */ - kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); - } kvm_service_local_tlb_flush_requests(vcpu); + /* + * Fall back to a "full" guest flush if Hyper-V's precise + * flushing fails. Note, Hyper-V's flushing is per-vCPU, but + * the flushes are considered "remote" and not "local" because + * the requests can be initiated from other vCPUs. + */ + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu) && + kvm_hv_vcpu_flush_tlb(vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -10585,8 +10301,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) record_steal_time(vcpu); +#ifdef CONFIG_KVM_SMM if (kvm_check_request(KVM_REQ_SMI, vcpu)) process_smi(vcpu); +#endif if (kvm_check_request(KVM_REQ_NMI, vcpu)) process_nmi(vcpu); if (kvm_check_request(KVM_REQ_PMU, vcpu)) @@ -11834,7 +11552,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.regs_avail = ~0; vcpu->arch.regs_dirty = ~0; - kvm_gpc_init(&vcpu->arch.pv_time); + kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN); if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu)) vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; @@ -11900,6 +11618,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; kvm_async_pf_hash_reset(vcpu); + + vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; @@ -12334,7 +12054,6 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id; } -EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { @@ -12909,10 +12628,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) static_call(kvm_x86_nmi_allowed)(vcpu, false))) return true; +#ifdef CONFIG_KVM_SMM if (kvm_test_request(KVM_REQ_SMI, vcpu) || (vcpu->arch.smi_pending && static_call(kvm_x86_smi_allowed)(vcpu, false))) return true; +#endif if (kvm_arch_interrupt_allowed(vcpu) && (kvm_cpu_has_interrupt(vcpu) || @@ -12953,7 +12674,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) return true; if (kvm_test_request(KVM_REQ_NMI, vcpu) || +#ifdef CONFIG_KVM_SMM kvm_test_request(KVM_REQ_SMI, vcpu) || +#endif kvm_test_request(KVM_REQ_EVENT, vcpu)) return true; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 829d3134c1eb08d290aba61cf1ecf292b4ae9466..9de72586f4065e6a1db1af334710d6c2127412c5 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -27,6 +27,7 @@ struct kvm_caps { u64 supported_mce_cap; u64 supported_xcr0; u64 supported_xss; + u64 supported_perf_cap; }; void kvm_spurious_fault(void); diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c index f3098c0e386a8a6a8eadf50f993ec2a00cfe2317..d7af402402484bb008f253213a86c186224a34c1 100644 --- a/arch/x86/kvm/xen.c +++ b/arch/x86/kvm/xen.c @@ -42,13 +42,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn) int idx = srcu_read_lock(&kvm->srcu); if (gfn == GPA_INVALID) { - kvm_gpc_deactivate(kvm, gpc); + kvm_gpc_deactivate(gpc); goto out; } do { - ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa, - PAGE_SIZE); + ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE); if (ret) goto out; @@ -170,112 +169,45 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu) vcpu->arch.xen.timer.function = xen_timer_callback; } -static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic) { struct kvm_vcpu_xen *vx = &v->arch.xen; - u64 now = get_kvmclock_ns(v->kvm); - u64 delta_ns = now - vx->runstate_entry_time; - u64 run_delay = current->sched_info.run_delay; - - if (unlikely(!vx->runstate_entry_time)) - vx->current_runstate = RUNSTATE_offline; - - /* - * Time waiting for the scheduler isn't "stolen" if the - * vCPU wasn't running anyway. - */ - if (vx->current_runstate == RUNSTATE_running) { - u64 steal_ns = run_delay - vx->last_steal; - - delta_ns -= steal_ns; - - vx->runstate_times[RUNSTATE_runnable] += steal_ns; - } - vx->last_steal = run_delay; - - vx->runstate_times[vx->current_runstate] += delta_ns; - vx->current_runstate = state; - vx->runstate_entry_time = now; -} - -void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) -{ - struct kvm_vcpu_xen *vx = &v->arch.xen; - struct gfn_to_pfn_cache *gpc = &vx->runstate_cache; - uint64_t *user_times; + struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache; + struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache; + size_t user_len, user_len1, user_len2; + struct vcpu_runstate_info rs; unsigned long flags; - size_t user_len; - int *user_state; - - kvm_xen_update_runstate(v, state); - - if (!vx->runstate_cache.active) - return; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_len = sizeof(struct vcpu_runstate_info); - else - user_len = sizeof(struct compat_vcpu_runstate_info); - - read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - user_len)) { - read_unlock_irqrestore(&gpc->lock, flags); - - /* When invoked from kvm_sched_out() we cannot sleep */ - if (state == RUNSTATE_runnable) - return; - - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len)) - return; - - read_lock_irqsave(&gpc->lock, flags); - } + size_t times_ofs; + uint8_t *update_bit = NULL; + uint64_t entry_time; + uint64_t *rs_times; + int *rs_state; /* * The only difference between 32-bit and 64-bit versions of the - * runstate struct us the alignment of uint64_t in 32-bit, which + * runstate struct is the alignment of uint64_t in 32-bit, which * means that the 64-bit version has an additional 4 bytes of - * padding after the first field 'state'. - * - * So we use 'int __user *user_state' to point to the state field, - * and 'uint64_t __user *user_times' for runstate_entry_time. So - * the actual array of time[] in each state starts at user_times[1]. + * padding after the first field 'state'. Let's be really really + * paranoid about that, and matching it with our internal data + * structures that we memcpy into it... */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0); BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c); #ifdef CONFIG_X86_64 + /* + * The 64-bit structure has 4 bytes of padding before 'state_entry_time' + * so each subsequent field is shifted by 4, and it's 4 bytes longer. + */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4); BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) != offsetof(struct compat_vcpu_runstate_info, time) + 4); + BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4); #endif - - user_state = gpc->khva; - - if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) - user_times = gpc->khva + offsetof(struct vcpu_runstate_info, - state_entry_time); - else - user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info, - state_entry_time); - /* - * First write the updated state_entry_time at the appropriate - * location determined by 'offset'. - */ - BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != - sizeof(user_times[0])); - - user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE; - smp_wmb(); - - /* - * Next, write the new runstate. This is in the *same* place - * for 32-bit and 64-bit guests, asserted here for paranoia. + * The state field is in the same place at the start of both structs, + * and is the same size (int) as vx->current_runstate. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != offsetof(struct compat_vcpu_runstate_info, state)); @@ -284,34 +216,238 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state) BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) != sizeof(vx->current_runstate)); - *user_state = vx->current_runstate; + /* + * The state_entry_time field is 64 bits in both versions, and the + * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86 + * is little-endian means that it's in the last *byte* of the word. + * That detail is important later. + */ + BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) != + sizeof(uint64_t)); + BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80); /* - * Write the actual runstate times immediately after the - * runstate_entry_time. + * The time array is four 64-bit quantities in both versions, matching + * the vx->runstate_times and immediately following state_entry_time. */ BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) != - offsetof(struct vcpu_runstate_info, time) - sizeof(u64)); + offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) != - offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64)); + offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof_field(struct compat_vcpu_runstate_info, time)); BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) != sizeof(vx->runstate_times)); - memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); - smp_wmb(); + if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) { + user_len = sizeof(struct vcpu_runstate_info); + times_ofs = offsetof(struct vcpu_runstate_info, + state_entry_time); + } else { + user_len = sizeof(struct compat_vcpu_runstate_info); + times_ofs = offsetof(struct compat_vcpu_runstate_info, + state_entry_time); + } + + /* + * There are basically no alignment constraints. The guest can set it + * up so it crosses from one page to the next, and at arbitrary byte + * alignment (and the 32-bit ABI doesn't align the 64-bit integers + * anyway, even if the overall struct had been 64-bit aligned). + */ + if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) { + user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK); + user_len2 = user_len - user_len1; + } else { + user_len1 = user_len; + user_len2 = 0; + } + BUG_ON(user_len1 + user_len2 != user_len); + + retry: + /* + * Attempt to obtain the GPC lock on *both* (if there are two) + * gfn_to_pfn caches that cover the region. + */ + read_lock_irqsave(&gpc1->lock, flags); + while (!kvm_gpc_check(gpc1, user_len1)) { + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + if (kvm_gpc_refresh(gpc1, user_len1)) + return; + + read_lock_irqsave(&gpc1->lock, flags); + } + + if (likely(!user_len2)) { + /* + * Set up three pointers directly to the runstate_info + * struct in the guest (via the GPC). + * + * • @rs_state → state field + * • @rs_times → state_entry_time field. + * • @update_bit → last byte of state_entry_time, which + * contains the XEN_RUNSTATE_UPDATE bit. + */ + rs_state = gpc1->khva; + rs_times = gpc1->khva + times_ofs; + if (v->kvm->arch.xen.runstate_update_flag) + update_bit = ((void *)(&rs_times[1])) - 1; + } else { + /* + * The guest's runstate_info is split across two pages and we + * need to hold and validate both GPCs simultaneously. We can + * declare a lock ordering GPC1 > GPC2 because nothing else + * takes them more than one at a time. + */ + read_lock(&gpc2->lock); + + if (!kvm_gpc_check(gpc2, user_len2)) { + read_unlock(&gpc2->lock); + read_unlock_irqrestore(&gpc1->lock, flags); + + /* When invoked from kvm_sched_out() we cannot sleep */ + if (atomic) + return; + + /* + * Use kvm_gpc_activate() here because if the runstate + * area was configured in 32-bit mode and only extends + * to the second page now because the guest changed to + * 64-bit mode, the second GPC won't have been set up. + */ + if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1, + user_len2)) + return; + + /* + * We dropped the lock on GPC1 so we have to go all the + * way back and revalidate that too. + */ + goto retry; + } + + /* + * In this case, the runstate_info struct will be assembled on + * the kernel stack (compat or not as appropriate) and will + * be copied to GPC1/GPC2 with a dual memcpy. Set up the three + * rs pointers accordingly. + */ + rs_times = &rs.state_entry_time; + + /* + * The rs_state pointer points to the start of what we'll + * copy to the guest, which in the case of a compat guest + * is the 32-bit field that the compiler thinks is padding. + */ + rs_state = ((void *)rs_times) - times_ofs; + + /* + * The update_bit is still directly in the guest memory, + * via one GPC or the other. + */ + if (v->kvm->arch.xen.runstate_update_flag) { + if (user_len1 >= times_ofs + sizeof(uint64_t)) + update_bit = gpc1->khva + times_ofs + + sizeof(uint64_t) - 1; + else + update_bit = gpc2->khva + times_ofs + + sizeof(uint64_t) - 1 - user_len1; + } + +#ifdef CONFIG_X86_64 + /* + * Don't leak kernel memory through the padding in the 64-bit + * version of the struct. + */ + memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time)); +#endif + } + + /* + * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the + * state_entry_time field, directly in the guest. We need to set + * that (and write-barrier) before writing to the rest of the + * structure, and clear it last. Just as Xen does, we address the + * single *byte* in which it resides because it might be in a + * different cache line to the rest of the 64-bit word, due to + * the (lack of) alignment constraints. + */ + entry_time = vx->runstate_entry_time; + if (update_bit) { + entry_time |= XEN_RUNSTATE_UPDATE; + *update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56; + smp_wmb(); + } /* - * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's - * runstate_entry_time field. + * Now assemble the actual structure, either on our kernel stack + * or directly in the guest according to how the rs_state and + * rs_times pointers were set up above. */ - user_times[0] &= ~XEN_RUNSTATE_UPDATE; + *rs_state = vx->current_runstate; + rs_times[0] = entry_time; + memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times)); + + /* For the split case, we have to then copy it to the guest. */ + if (user_len2) { + memcpy(gpc1->khva, rs_state, user_len1); + memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2); + } smp_wmb(); - read_unlock_irqrestore(&gpc->lock, flags); + /* Finally, clear the XEN_RUNSTATE_UPDATE bit. */ + if (update_bit) { + entry_time &= ~XEN_RUNSTATE_UPDATE; + *update_bit = entry_time >> 56; + smp_wmb(); + } - mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT); + if (user_len2) + read_unlock(&gpc2->lock); + + read_unlock_irqrestore(&gpc1->lock, flags); + + mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT); + if (user_len2) + mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT); +} + +void kvm_xen_update_runstate(struct kvm_vcpu *v, int state) +{ + struct kvm_vcpu_xen *vx = &v->arch.xen; + u64 now = get_kvmclock_ns(v->kvm); + u64 delta_ns = now - vx->runstate_entry_time; + u64 run_delay = current->sched_info.run_delay; + + if (unlikely(!vx->runstate_entry_time)) + vx->current_runstate = RUNSTATE_offline; + + /* + * Time waiting for the scheduler isn't "stolen" if the + * vCPU wasn't running anyway. + */ + if (vx->current_runstate == RUNSTATE_running) { + u64 steal_ns = run_delay - vx->last_steal; + + delta_ns -= steal_ns; + + vx->runstate_times[RUNSTATE_runnable] += steal_ns; + } + vx->last_steal = run_delay; + + vx->runstate_times[vx->current_runstate] += delta_ns; + vx->current_runstate = state; + vx->runstate_entry_time = now; + + if (vx->runstate_cache.active) + kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable); } static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v) @@ -352,12 +488,10 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v) * little more honest about it. */ read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) return; read_lock_irqsave(&gpc->lock, flags); @@ -417,8 +551,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending)); read_lock_irqsave(&gpc->lock, flags); - while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { read_unlock_irqrestore(&gpc->lock, flags); /* @@ -432,8 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v) if (in_atomic() || !task_is_running(current)) return 1; - if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, - sizeof(struct vcpu_info))) { + if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) { /* * If this failed, userspace has screwed up the * vcpu_info mapping. No interrupts for you. @@ -493,6 +625,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + mutex_lock(&kvm->lock); + kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag; + mutex_unlock(&kvm->lock); + r = 0; + break; + default: break; } @@ -530,6 +673,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data) r = 0; break; + case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG: + if (!sched_info_on()) { + r = -EOPNOTSUPP; + break; + } + data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag; + r = 0; + break; + default: break; } @@ -554,15 +706,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) offsetof(struct compat_vcpu_info, time)); if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_info_cache, NULL, - KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_info)); + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache, + data->u.gpa, sizeof(struct vcpu_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); @@ -570,37 +720,65 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO: if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); r = 0; break; } - r = kvm_gpc_activate(vcpu->kvm, - &vcpu->arch.xen.vcpu_time_info_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, + r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache, + data->u.gpa, sizeof(struct pvclock_vcpu_time_info)); if (!r) kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); break; - case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: + case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: { + size_t sz, sz1, sz2; + if (!sched_info_on()) { r = -EOPNOTSUPP; break; } if (data->u.gpa == GPA_INVALID) { - kvm_gpc_deactivate(vcpu->kvm, - &vcpu->arch.xen.runstate_cache); r = 0; + deactivate_out: + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); break; } - r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache, - NULL, KVM_HOST_USES_PFN, data->u.gpa, - sizeof(struct vcpu_runstate_info)); - break; + /* + * If the guest switches to 64-bit mode after setting the runstate + * address, that's actually OK. kvm_xen_update_runstate_guest() + * will cope. + */ + if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode) + sz = sizeof(struct vcpu_runstate_info); + else + sz = sizeof(struct compat_vcpu_runstate_info); + + /* How much fits in the (first) page? */ + sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK); + r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache, + data->u.gpa, sz1); + if (r) + goto deactivate_out; + + /* Either map the second page, or deactivate the second GPC */ + if (sz1 >= sz) { + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); + } else { + sz2 = sz - sz1; + BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK); + r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache, + data->u.gpa + sz1, sz2); + if (r) + goto deactivate_out; + } + kvm_xen_update_runstate_guest(vcpu, false); + break; + } case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT: if (!sched_info_on()) { r = -EOPNOTSUPP; @@ -693,6 +871,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data) if (data->u.runstate.state <= RUNSTATE_offline) kvm_xen_update_runstate(vcpu, data->u.runstate.state); + else if (vcpu->arch.xen.runstate_cache.active) + kvm_xen_update_runstate_guest(vcpu, false); r = 0; break; @@ -972,9 +1152,9 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, bool ret = true; int idx, i; - read_lock_irqsave(&gpc->lock, flags); idx = srcu_read_lock(&kvm->srcu); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + read_lock_irqsave(&gpc->lock, flags); + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; ret = false; @@ -994,8 +1174,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports, } out_rcu: - srcu_read_unlock(&kvm->srcu, idx); read_unlock_irqrestore(&gpc->lock, flags); + srcu_read_unlock(&kvm->srcu, idx); return ret; } @@ -1008,20 +1188,45 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode, evtchn_port_t port, *ports; gpa_t gpa; - if (!longmode || !lapic_in_kernel(vcpu) || + if (!lapic_in_kernel(vcpu) || !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND)) return false; idx = srcu_read_lock(&vcpu->kvm->srcu); gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL); srcu_read_unlock(&vcpu->kvm->srcu, idx); - - if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, - sizeof(sched_poll))) { + if (!gpa) { *r = -EFAULT; return true; } + if (IS_ENABLED(CONFIG_64BIT) && !longmode) { + struct compat_sched_poll sp32; + + /* Sanity check that the compat struct definition is correct */ + BUILD_BUG_ON(sizeof(sp32) != 16); + + if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) { + *r = -EFAULT; + return true; + } + + /* + * This is a 32-bit pointer to an array of evtchn_port_t which + * are uint32_t, so once it's converted no further compat + * handling is needed. + */ + sched_poll.ports = (void *)(unsigned long)(sp32.ports); + sched_poll.nr_ports = sp32.nr_ports; + sched_poll.timeout = sp32.timeout; + } else { + if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll, + sizeof(sched_poll))) { + *r = -EFAULT; + return true; + } + } + if (unlikely(sched_poll.nr_ports > 1)) { /* Xen (unofficially) limits number of pollers to 128 */ if (sched_poll.nr_ports > 128) { @@ -1256,7 +1461,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu) } #endif cpl = static_call(kvm_x86_get_cpl)(vcpu); - trace_kvm_xen_hypercall(input, params[0], params[1], params[2], + trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2], params[3], params[4], params[5]); /* @@ -1371,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) idx = srcu_read_lock(&kvm->srcu); read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE)) + if (!kvm_gpc_check(gpc, PAGE_SIZE)) goto out_rcu; if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) { @@ -1405,7 +1610,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm) gpc = &vcpu->arch.xen.vcpu_info_cache; read_lock_irqsave(&gpc->lock, flags); - if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) { + if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) { /* * Could not access the vcpu_info. Set the bit in-kernel * and prod the vCPU to deliver it for itself. @@ -1503,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm) break; idx = srcu_read_lock(&kvm->srcu); - rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE); + rc = kvm_gpc_refresh(gpc, PAGE_SIZE); srcu_read_unlock(&kvm->srcu, idx); } while(!rc); @@ -1833,9 +2038,14 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu) timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0); - kvm_gpc_init(&vcpu->arch.xen.runstate_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); + kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL, + KVM_HOST_USES_PFN); } void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) @@ -1843,9 +2053,10 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) if (kvm_xen_timer_enabled(vcpu)) kvm_xen_stop_timer(vcpu); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache); - kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache); + kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache); del_timer_sync(&vcpu->arch.xen.poll_timer); } @@ -1853,7 +2064,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu) void kvm_xen_init_vm(struct kvm *kvm) { idr_init(&kvm->arch.xen.evtchn_ports); - kvm_gpc_init(&kvm->arch.xen.shinfo_cache); + kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN); } void kvm_xen_destroy_vm(struct kvm *kvm) @@ -1861,7 +2072,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm) struct evtchnfd *evtchnfd; int i; - kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache); + kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache); idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) { if (!evtchnfd->deliver.port.port) diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h index 532a535a9e99f43c4cfadc1fbbb1192630225bf8..ea33d80a0c51f811278541b428ba93ccb203a718 100644 --- a/arch/x86/kvm/xen.h +++ b/arch/x86/kvm/xen.h @@ -143,11 +143,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu); #include #include -void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state); +void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state); static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu) { - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running); + kvm_xen_update_runstate(vcpu, RUNSTATE_running); } static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) @@ -162,7 +162,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu) if (WARN_ON_ONCE(!vcpu->preempted)) return; - kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable); + kvm_xen_update_runstate(vcpu, RUNSTATE_runnable); } /* 32-bit compatibility definitions, also used natively in 32-bit build */ @@ -207,4 +207,11 @@ struct compat_vcpu_runstate_info { uint64_t time[4]; } __attribute__((packed)); +struct compat_sched_poll { + /* This is actually a guest virtual address which points to ports. */ + uint32_t ports; + unsigned int nr_ports; + uint64_t timeout; +}; + #endif /* __ARCH_X86_KVM_XEN_H__ */ diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c index 1e3de0769b8104198a6cc69c0bf8a4b10aea06c6..b5a6d83106bc2d1789d55910add79cd1a8c90236 100644 --- a/arch/x86/lib/error-inject.c +++ b/arch/x86/lib/error-inject.c @@ -11,6 +11,7 @@ asm( ".text\n" ".type just_return_func, @function\n" ".globl just_return_func\n" + ASM_FUNC_ALIGN "just_return_func:\n" ANNOTATE_NOENDBR ASM_RET diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S index b7dfd60243b75c65f79e39a1486163a5d7f21ca6..32125224fccaadfb45863679bca5c5e7cba85009 100644 --- a/arch/x86/lib/putuser.S +++ b/arch/x86/lib/putuser.S @@ -47,8 +47,6 @@ SYM_FUNC_START(__put_user_1) LOAD_TASK_SIZE_MINUS_N(0) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) - ENDBR ASM_STAC 1: movb %al,(%_ASM_CX) xor %ecx,%ecx @@ -56,54 +54,87 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL) RET SYM_FUNC_END(__put_user_1) EXPORT_SYMBOL(__put_user_1) + +SYM_FUNC_START(__put_user_nocheck_1) + ENDBR + ASM_STAC +2: movb %al,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_1) EXPORT_SYMBOL(__put_user_nocheck_1) SYM_FUNC_START(__put_user_2) LOAD_TASK_SIZE_MINUS_N(1) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL) - ENDBR ASM_STAC -2: movw %ax,(%_ASM_CX) +3: movw %ax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_2) EXPORT_SYMBOL(__put_user_2) + +SYM_FUNC_START(__put_user_nocheck_2) + ENDBR + ASM_STAC +4: movw %ax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_2) EXPORT_SYMBOL(__put_user_nocheck_2) SYM_FUNC_START(__put_user_4) LOAD_TASK_SIZE_MINUS_N(3) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL) - ENDBR ASM_STAC -3: movl %eax,(%_ASM_CX) +5: movl %eax,(%_ASM_CX) xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_4) EXPORT_SYMBOL(__put_user_4) + +SYM_FUNC_START(__put_user_nocheck_4) + ENDBR + ASM_STAC +6: movl %eax,(%_ASM_CX) + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_4) EXPORT_SYMBOL(__put_user_nocheck_4) SYM_FUNC_START(__put_user_8) LOAD_TASK_SIZE_MINUS_N(7) cmp %_ASM_BX,%_ASM_CX jae .Lbad_put_user -SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL) - ENDBR ASM_STAC -4: mov %_ASM_AX,(%_ASM_CX) +7: mov %_ASM_AX,(%_ASM_CX) #ifdef CONFIG_X86_32 -5: movl %edx,4(%_ASM_CX) +8: movl %edx,4(%_ASM_CX) #endif xor %ecx,%ecx ASM_CLAC RET SYM_FUNC_END(__put_user_8) EXPORT_SYMBOL(__put_user_8) + +SYM_FUNC_START(__put_user_nocheck_8) + ENDBR + ASM_STAC +9: mov %_ASM_AX,(%_ASM_CX) +#ifdef CONFIG_X86_32 +10: movl %edx,4(%_ASM_CX) +#endif + xor %ecx,%ecx + ASM_CLAC + RET +SYM_FUNC_END(__put_user_nocheck_8) EXPORT_SYMBOL(__put_user_nocheck_8) SYM_CODE_START_LOCAL(.Lbad_put_user_clac) @@ -117,6 +148,11 @@ SYM_CODE_END(.Lbad_put_user_clac) _ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) _ASM_EXTABLE_UA(3b, .Lbad_put_user_clac) _ASM_EXTABLE_UA(4b, .Lbad_put_user_clac) -#ifdef CONFIG_X86_32 _ASM_EXTABLE_UA(5b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(6b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(7b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(9b, .Lbad_put_user_clac) +#ifdef CONFIG_X86_32 + _ASM_EXTABLE_UA(8b, .Lbad_put_user_clac) + _ASM_EXTABLE_UA(10b, .Lbad_put_user_clac) #endif diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 073289a55f8495cb3f175d97d847485b05eb7a8b..5f61c65322bea68c5164d01afcba0ccad805407d 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -5,24 +5,27 @@ #include #include #include +#include #include #include #include +#include #include .section .text.__x86.indirect_thunk -.macro RETPOLINE reg + +.macro POLINE reg ANNOTATE_INTRA_FUNCTION_CALL call .Ldo_rop_\@ -.Lspec_trap_\@: - UNWIND_HINT_EMPTY - pause - lfence - jmp .Lspec_trap_\@ + int3 .Ldo_rop_\@: mov %\reg, (%_ASM_SP) UNWIND_HINT_FUNC +.endm + +.macro RETPOLINE reg + POLINE \reg RET .endm @@ -52,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL) */ #define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) .align RETPOLINE_THUNK_SIZE SYM_CODE_START(__x86_indirect_thunk_array) @@ -64,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array) .align RETPOLINE_THUNK_SIZE SYM_CODE_END(__x86_indirect_thunk_array) -#define GEN(reg) EXPORT_THUNK(reg) +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +#include +#undef GEN + +#ifdef CONFIG_CALL_DEPTH_TRACKING +.macro CALL_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + + CALL_DEPTH_ACCOUNT + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_call_thunk_array) + +#define GEN(reg) CALL_THUNK reg #include #undef GEN + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_call_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg) +#include +#undef GEN + +.macro JUMP_THUNK reg + .align RETPOLINE_THUNK_SIZE + +SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL) + UNWIND_HINT_EMPTY + ANNOTATE_NOENDBR + POLINE \reg + ANNOTATE_UNRET_SAFE + ret + int3 +.endm + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_START(__x86_indirect_jump_thunk_array) + +#define GEN(reg) JUMP_THUNK reg +#include +#undef GEN + + .align RETPOLINE_THUNK_SIZE +SYM_CODE_END(__x86_indirect_jump_thunk_array) + +#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg) +#include +#undef GEN +#endif /* * This function name is magical and is used by -mfunction-return=thunk-extern * for the compiler to generate JMPs to it. @@ -140,3 +197,37 @@ __EXPORT_THUNK(zen_untrain_ret) EXPORT_SYMBOL(__x86_return_thunk) #endif /* CONFIG_RETHUNK */ + +#ifdef CONFIG_CALL_DEPTH_TRACKING + + .align 64 +SYM_FUNC_START(__x86_return_skl) + ANNOTATE_NOENDBR + /* + * Keep the hotpath in a 16byte I-fetch for the non-debug + * case. + */ + CALL_THUNKS_DEBUG_INC_RETS + shlq $5, PER_CPU_VAR(pcpu_hot + X86_call_depth) + jz 1f + ANNOTATE_UNRET_SAFE + ret + int3 +1: + CALL_THUNKS_DEBUG_INC_STUFFS + .rept 16 + ANNOTATE_INTRA_FUNCTION_CALL + call 2f + int3 +2: + .endr + add $(8*16), %rsp + + CREDIT_CALL_DEPTH + + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(__x86_return_skl) + +#endif /* CONFIG_CALL_DEPTH_TRACKING */ diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 8525f2876fb409101e525bd3f43997fa66180c32..e4f499eb0f295beda06263181a0247e0d76eaac7 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -299,9 +299,6 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[]) pud_t *pud; int i; - if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */ - return; - p4d = p4d_offset(pgd, 0); pud = pud_offset(p4d, 0); @@ -434,10 +431,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm) mm->pgd = pgd; - if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) + if (sizeof(pmds) != 0 && + preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0) goto out_free_pgd; - if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) + if (sizeof(u_pmds) != 0 && + preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0) goto out_free_pmds; if (paravirt_pgd_alloc(mm) != 0) @@ -451,17 +450,22 @@ pgd_t *pgd_alloc(struct mm_struct *mm) spin_lock(&pgd_lock); pgd_ctor(mm, pgd); - pgd_prepopulate_pmd(mm, pgd, pmds); - pgd_prepopulate_user_pmd(mm, pgd, u_pmds); + if (sizeof(pmds) != 0) + pgd_prepopulate_pmd(mm, pgd, pmds); + + if (sizeof(u_pmds) != 0) + pgd_prepopulate_user_pmd(mm, pgd, u_pmds); spin_unlock(&pgd_lock); return pgd; out_free_user_pmds: - free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); + if (sizeof(u_pmds) != 0) + free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS); out_free_pmds: - free_pmds(mm, pmds, PREALLOCATED_PMDS); + if (sizeof(pmds) != 0) + free_pmds(mm, pmds, PREALLOCATED_PMDS); out_free_pgd: _pgd_free(pgd); out: diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 36ffe67ad6e55e23b34ff7ee0ca75be8b22aacb3..b808be77635ed3731fd111d4c7031891783c00a7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -340,6 +341,13 @@ static int emit_call(u8 **pprog, void *func, void *ip) return emit_patch(pprog, func, ip, 0xE8); } +static int emit_rsb_call(u8 **pprog, void *func, void *ip) +{ + OPTIMIZER_HIDE_VAR(func); + x86_call_depth_emit_accounting(pprog, func); + return emit_patch(pprog, func, ip, 0xE8); +} + static int emit_jump(u8 **pprog, void *func, void *ip) { return emit_patch(pprog, func, ip, 0xE9); @@ -417,7 +425,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) EMIT2(0xFF, 0xE0 + reg); } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { OPTIMIZER_HIDE_VAR(reg); - emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); + if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) + emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); + else + emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); } else { EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) @@ -432,7 +443,7 @@ static void emit_return(u8 **pprog, u8 *ip) u8 *prog = *pprog; if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { - emit_jump(&prog, &__x86_return_thunk, ip); + emit_jump(&prog, x86_return_thunk, ip); } else { EMIT1(0xC3); /* ret */ if (IS_ENABLED(CONFIG_SLS)) @@ -1514,19 +1525,26 @@ st: if (is_imm8(insn->off)) break; /* call */ - case BPF_JMP | BPF_CALL: + case BPF_JMP | BPF_CALL: { + int offs; + func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ EMIT3_off32(0x48, 0x8B, 0x85, -round_up(bpf_prog->aux->stack_depth, 8) - 8); - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7)) + if (!imm32) return -EINVAL; + offs = 7 + x86_call_depth_emit_accounting(&prog, func); } else { - if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) + if (!imm32) return -EINVAL; + offs = x86_call_depth_emit_accounting(&prog, func); } + if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + return -EINVAL; break; + } case BPF_JMP | BPF_TAIL_CALL: if (imm32) @@ -1917,7 +1935,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg2: lea rsi, [rbp - ctx_cookie_off] */ EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); - if (emit_call(&prog, bpf_trampoline_enter(p), prog)) + if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) return -EINVAL; /* remember prog start time returned by __bpf_prog_enter */ emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); @@ -1938,7 +1956,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, (long) p->insnsi >> 32, (u32) (long) p->insnsi); /* call JITed bpf program or interpreter */ - if (emit_call(&prog, p->bpf_func, prog)) + if (emit_rsb_call(&prog, p->bpf_func, prog)) return -EINVAL; /* @@ -1962,7 +1980,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); - if (emit_call(&prog, bpf_trampoline_exit(p), prog)) + if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) return -EINVAL; *pprog = prog; @@ -2184,6 +2202,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i prog = image; EMIT_ENDBR(); + /* + * This is the direct-call trampoline, as such it needs accounting + * for the __fentry__ call. + */ + x86_call_depth_emit_accounting(&prog, NULL); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ @@ -2210,7 +2233,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_call(&prog, __bpf_tramp_enter, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) { ret = -EINVAL; goto cleanup; } @@ -2242,7 +2265,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT2(0xff, 0xd0); /* call *rax */ } else { /* call original function */ - if (emit_call(&prog, orig_call, prog)) { + if (emit_rsb_call(&prog, orig_call, prog)) { ret = -EINVAL; goto cleanup; } @@ -2286,7 +2309,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i im->ip_epilogue = prog; /* arg1: mov rdi, im */ emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); - if (emit_call(&prog, __bpf_tramp_exit, prog)) { + if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) { ret = -EINVAL; goto cleanup; } diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2f82480fd430571895ce194243f37f04b8010071..ea2eb2ec90e2bc96368090bb0478a2c8f591df0e 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -1,4 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 + +#define pr_fmt(fmt) "PCI: " fmt + #include #include #include @@ -37,15 +40,15 @@ static int __init set_nouse_crs(const struct dmi_system_id *id) static int __init set_ignore_seg(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident); + pr_info("%s detected: ignoring ACPI _SEG\n", id->ident); pci_ignore_seg = true; return 0; } static int __init set_no_e820(const struct dmi_system_id *id) { - printk(KERN_INFO "PCI: %s detected: not clipping E820 regions from _CRS\n", - id->ident); + pr_info("%s detected: not clipping E820 regions from _CRS\n", + id->ident); pci_use_e820 = false; return 0; } @@ -231,10 +234,9 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE__CRS) pci_use_crs = true; - printk(KERN_INFO "PCI: %s host bridge windows from ACPI; " - "if necessary, use \"pci=%s\" and report a bug\n", - pci_use_crs ? "Using" : "Ignoring", - pci_use_crs ? "nocrs" : "use_crs"); + pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n", + pci_use_crs ? "Using" : "Ignoring", + pci_use_crs ? "nocrs" : "use_crs"); /* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */ if (pci_probe & PCI_NO_E820) @@ -242,19 +244,17 @@ void __init pci_acpi_crs_quirks(void) else if (pci_probe & PCI_USE_E820) pci_use_e820 = true; - printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n", - pci_use_e820 ? "Using" : "Ignoring"); + pr_info("%s E820 reservations for host bridge windows\n", + pci_use_e820 ? "Using" : "Ignoring"); if (pci_probe & (PCI_NO_E820 | PCI_USE_E820)) - printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n"); + pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n"); } #ifdef CONFIG_PCI_MMCONFIG static int check_segment(u16 seg, struct device *dev, char *estr) { if (seg) { - dev_err(dev, - "%s can't access PCI configuration " - "space under this host bridge.\n", + dev_err(dev, "%s can't access configuration space under this host bridge\n", estr); return -EIO; } @@ -264,9 +264,7 @@ static int check_segment(u16 seg, struct device *dev, char *estr) * just can't access extended configuration space of * devices under this host bridge. */ - dev_warn(dev, - "%s can't access extended PCI configuration " - "space under this bridge.\n", + dev_warn(dev, "%s can't access extended configuration space under this bridge\n", estr); return 0; @@ -421,9 +419,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) root->segment = domain = 0; if (domain && !pci_domains_supported) { - printk(KERN_WARNING "pci_bus %04x:%02x: " - "ignored (multiple domains not supported)\n", - domain, busnum); + pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n", + domain, busnum); return NULL; } @@ -491,7 +488,7 @@ int __init pci_acpi_init(void) if (acpi_noirq) return -ENODEV; - printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n"); + pr_info("Using ACPI for IRQ routing\n"); acpi_irq_penalty_init(); pcibios_enable_irq = acpi_pci_irq_enable; pcibios_disable_irq = acpi_pci_irq_disable; @@ -503,7 +500,7 @@ int __init pci_acpi_init(void) * also do it here in case there are still broken drivers that * don't use pci_enable_device(). */ - printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); + pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n"); for_each_pci_dev(dev) acpi_pci_irq_enable(dev); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 7e51c14a1ef06d83d854a0c165ec605bbf3919db..55d9caf66401a445fdd27e18e49010988e6a3337 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -305,6 +305,50 @@ static void __init efi_clean_memmap(void) } } +/* + * Firmware can use EfiMemoryMappedIO to request that MMIO regions be + * mapped by the OS so they can be accessed by EFI runtime services, but + * should have no other significance to the OS (UEFI r2.10, sec 7.2). + * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO + * regions to E820_TYPE_RESERVED entries, which prevent Linux from + * allocating space from them (see remove_e820_regions()). + * + * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and + * PCI host bridge windows, which means Linux can't allocate BAR space for + * hot-added devices. + * + * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this + * problem. + * + * Retain small EfiMemoryMappedIO regions because on some platforms, these + * describe non-window space that's included in host bridge _CRS. If we + * assign that space to PCI devices, they don't work. + */ +static void __init efi_remove_e820_mmio(void) +{ + efi_memory_desc_t *md; + u64 size, start, end; + int i = 0; + + for_each_efi_memory_desc(md) { + if (md->type == EFI_MEMORY_MAPPED_IO) { + size = md->num_pages << EFI_PAGE_SHIFT; + start = md->phys_addr; + end = start + size - 1; + if (size >= 256*1024) { + pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n", + i, start, end, size >> 20); + e820__range_remove(start, size, + E820_TYPE_RESERVED, 1); + } else { + pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n", + i, start, end, size >> 10); + } + } + i++; + } +} + void __init efi_print_memmap(void) { efi_memory_desc_t *md; @@ -476,6 +520,8 @@ void __init efi_init(void) set_bit(EFI_RUNTIME_SERVICES, &efi.flags); efi_clean_memmap(); + efi_remove_e820_mmio(); + if (efi_enabled(EFI_DBG)) efi_print_memmap(); } diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c index e94e0050a583a8fe7d865093e9995a84c2fcdf32..6f955eb1e1631a04df52708ea9792bb0328d1ca8 100644 --- a/arch/x86/power/hibernate.c +++ b/arch/x86/power/hibernate.c @@ -159,7 +159,7 @@ int relocate_restore_code(void) if (!relocated_restore_code) return -ENOMEM; - memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); + __memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE); /* Make the page containing the relocated code executable */ pgd = (pgd_t *)__va(read_cr3_pa()) + diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 468e8c8cce01eff6498a39410d6f20461dcb4a3e..5b137966287709d8640e851352e5f58ea3811c9a 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -1210,7 +1210,7 @@ static void __init xen_setup_gdt(int cpu) pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot; pv_ops.cpu.load_gdt = xen_load_gdt_boot; - switch_to_new_gdt(cpu); + switch_gdt_and_percpu_base(cpu); pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry; pv_ops.cpu.load_gdt = xen_load_gdt; diff --git a/crypto/Kconfig b/crypto/Kconfig index d779667671b23f03c57b5e887627f06da775ef2f..9c86f704515761c47bbafc579827651b2810cf9b 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -175,9 +175,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS This is intended for developer use only, as these tests take much longer to run than the normal self tests. -config CRYPTO_GF128MUL - tristate - config CRYPTO_NULL tristate "Null algorithms" select CRYPTO_NULL2 @@ -714,9 +711,9 @@ config CRYPTO_KEYWRAP config CRYPTO_LRW tristate "LRW (Liskov Rivest Wagner)" + select CRYPTO_LIB_GF128MUL select CRYPTO_SKCIPHER select CRYPTO_MANAGER - select CRYPTO_GF128MUL select CRYPTO_ECB help LRW (Liskov Rivest Wagner) mode @@ -926,8 +923,8 @@ config CRYPTO_CMAC config CRYPTO_GHASH tristate "GHASH" - select CRYPTO_GF128MUL select CRYPTO_HASH + select CRYPTO_LIB_GF128MUL help GCM GHASH function (NIST SP800-38D) @@ -967,8 +964,8 @@ config CRYPTO_MICHAEL_MIC config CRYPTO_POLYVAL tristate - select CRYPTO_GF128MUL select CRYPTO_HASH + select CRYPTO_LIB_GF128MUL help POLYVAL hash function for HCTR2 diff --git a/crypto/Makefile b/crypto/Makefile index 303b21c43df05a5e4784b355150daadbd9ec8d66..d0126c915834b2f09b3ed42ab712a69b26b47258 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -85,7 +85,6 @@ obj-$(CONFIG_CRYPTO_WP512) += wp512.o CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns) # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149 obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930 -obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o obj-$(CONFIG_CRYPTO_ECB) += ecb.o obj-$(CONFIG_CRYPTO_CBC) += cbc.o obj-$(CONFIG_CRYPTO_CFB) += cfb.o diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c index 27ab2793181374ff142c37cdf3106a878a0eabd8..666474b81c6aa5655fd48eee511cc57203dd1410 100644 --- a/crypto/aes_generic.c +++ b/crypto/aes_generic.c @@ -48,11 +48,11 @@ */ #include +#include #include #include #include #include -#include #include #include diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c index 205c2c257d4926b327af3f806088e45dfa4fbdd9..a3b342f92fab6b910cd675dd9de8cc217bc66f5e 100644 --- a/crypto/aes_ti.c +++ b/crypto/aes_ti.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key, diff --git a/crypto/af_alg.c b/crypto/af_alg.c index e893c0f6c8799a78f09b125702ace398ae43938d..0a4fa2a429e22a60d9fc3845ba2d21b29c0fcb60 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include #include #include @@ -19,6 +21,10 @@ #include #include #include +#include +#include +#include +#include struct alg_type_list { const struct af_alg_type *type; @@ -222,6 +228,129 @@ out: return err; } +#ifdef CONFIG_KEYS + +static const u8 *key_data_ptr_user(const struct key *key, + unsigned int *datalen) +{ + const struct user_key_payload *ukp; + + ukp = user_key_payload_locked(key); + if (IS_ERR_OR_NULL(ukp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = key->datalen; + + return ukp->data; +} + +static const u8 *key_data_ptr_encrypted(const struct key *key, + unsigned int *datalen) +{ + const struct encrypted_key_payload *ekp; + + ekp = dereference_key_locked(key); + if (IS_ERR_OR_NULL(ekp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = ekp->decrypted_datalen; + + return ekp->decrypted_data; +} + +static const u8 *key_data_ptr_trusted(const struct key *key, + unsigned int *datalen) +{ + const struct trusted_key_payload *tkp; + + tkp = dereference_key_locked(key); + if (IS_ERR_OR_NULL(tkp)) + return ERR_PTR(-EKEYREVOKED); + + *datalen = tkp->key_len; + + return tkp->key; +} + +static struct key *lookup_key(key_serial_t serial) +{ + key_ref_t key_ref; + + key_ref = lookup_user_key(serial, 0, KEY_NEED_SEARCH); + if (IS_ERR(key_ref)) + return ERR_CAST(key_ref); + + return key_ref_to_ptr(key_ref); +} + +static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval, + unsigned int optlen) +{ + const struct af_alg_type *type = ask->type; + u8 *key_data = NULL; + unsigned int key_datalen; + key_serial_t serial; + struct key *key; + const u8 *ret; + int err; + + if (optlen != sizeof(serial)) + return -EINVAL; + + if (copy_from_sockptr(&serial, optval, optlen)) + return -EFAULT; + + key = lookup_key(serial); + if (IS_ERR(key)) + return PTR_ERR(key); + + down_read(&key->sem); + + ret = ERR_PTR(-ENOPROTOOPT); + if (!strcmp(key->type->name, "user") || + !strcmp(key->type->name, "logon")) { + ret = key_data_ptr_user(key, &key_datalen); + } else if (IS_REACHABLE(CONFIG_ENCRYPTED_KEYS) && + !strcmp(key->type->name, "encrypted")) { + ret = key_data_ptr_encrypted(key, &key_datalen); + } else if (IS_REACHABLE(CONFIG_TRUSTED_KEYS) && + !strcmp(key->type->name, "trusted")) { + ret = key_data_ptr_trusted(key, &key_datalen); + } + + if (IS_ERR(ret)) { + up_read(&key->sem); + return PTR_ERR(ret); + } + + key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL); + if (!key_data) { + up_read(&key->sem); + return -ENOMEM; + } + + memcpy(key_data, ret, key_datalen); + + up_read(&key->sem); + + err = type->setkey(ask->private, key_data, key_datalen); + + sock_kzfree_s(&ask->sk, key_data, key_datalen); + + return err; +} + +#else + +static inline int alg_setkey_by_key_serial(struct alg_sock *ask, + sockptr_t optval, + unsigned int optlen) +{ + return -ENOPROTOOPT; +} + +#endif + static int alg_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { @@ -242,12 +371,16 @@ static int alg_setsockopt(struct socket *sock, int level, int optname, switch (optname) { case ALG_SET_KEY: + case ALG_SET_KEY_BY_KEY_SERIAL: if (sock->state == SS_CONNECTED) goto unlock; if (!type->setkey) goto unlock; - err = alg_setkey(sk, optval, optlen); + if (optname == ALG_SET_KEY_BY_KEY_SERIAL) + err = alg_setkey_by_key_serial(ask, optval, optlen); + else + err = alg_setkey(sk, optval, optlen); break; case ALG_SET_AEAD_AUTHSIZE: if (sock->state == SS_CONNECTED) diff --git a/crypto/algapi.c b/crypto/algapi.c index 5c69ff8e8fa5c1dab176cd476f19907f45893dfe..d08f864f08beef69e787473faa03ea67998a05c0 100644 --- a/crypto/algapi.c +++ b/crypto/algapi.c @@ -222,12 +222,65 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list, } EXPORT_SYMBOL_GPL(crypto_remove_spawns); +static void crypto_alg_finish_registration(struct crypto_alg *alg, + bool fulfill_requests, + struct list_head *algs_to_put) +{ + struct crypto_alg *q; + + list_for_each_entry(q, &crypto_alg_list, cra_list) { + if (q == alg) + continue; + + if (crypto_is_moribund(q)) + continue; + + if (crypto_is_larval(q)) { + struct crypto_larval *larval = (void *)q; + + /* + * Check to see if either our generic name or + * specific name can satisfy the name requested + * by the larval entry q. + */ + if (strcmp(alg->cra_name, q->cra_name) && + strcmp(alg->cra_driver_name, q->cra_name)) + continue; + + if (larval->adult) + continue; + if ((q->cra_flags ^ alg->cra_flags) & larval->mask) + continue; + + if (fulfill_requests && crypto_mod_get(alg)) + larval->adult = alg; + else + larval->adult = ERR_PTR(-EAGAIN); + + continue; + } + + if (strcmp(alg->cra_name, q->cra_name)) + continue; + + if (strcmp(alg->cra_driver_name, q->cra_driver_name) && + q->cra_priority > alg->cra_priority) + continue; + + crypto_remove_spawns(q, algs_to_put, alg); + } + + crypto_notify(CRYPTO_MSG_ALG_LOADED, alg); +} + static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) { struct crypto_larval *larval; - if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER)) - return NULL; + if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) || + IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) || + (alg->cra_flags & CRYPTO_ALG_INTERNAL)) + return NULL; /* No self-test needed */ larval = crypto_larval_alloc(alg->cra_name, alg->cra_flags | CRYPTO_ALG_TESTED, 0); @@ -248,7 +301,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg) return larval; } -static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) +static struct crypto_larval * +__crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put) { struct crypto_alg *q; struct crypto_larval *larval; @@ -259,9 +313,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) INIT_LIST_HEAD(&alg->cra_users); - /* No cheating! */ - alg->cra_flags &= ~CRYPTO_ALG_TESTED; - ret = -EEXIST; list_for_each_entry(q, &crypto_alg_list, cra_list) { @@ -288,12 +339,17 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg) list_add(&alg->cra_list, &crypto_alg_list); - if (larval) + crypto_stats_init(alg); + + if (larval) { + /* No cheating! */ + alg->cra_flags &= ~CRYPTO_ALG_TESTED; + list_add(&larval->alg.cra_list, &crypto_alg_list); - else + } else { alg->cra_flags |= CRYPTO_ALG_TESTED; - - crypto_stats_init(alg); + crypto_alg_finish_registration(alg, true, algs_to_put); + } out: return larval; @@ -341,7 +397,10 @@ found: alg->cra_flags |= CRYPTO_ALG_TESTED; - /* Only satisfy larval waiters if we are the best. */ + /* + * If a higher-priority implementation of the same algorithm is + * currently being tested, then don't fulfill request larvals. + */ best = true; list_for_each_entry(q, &crypto_alg_list, cra_list) { if (crypto_is_moribund(q) || !crypto_is_larval(q)) @@ -356,47 +415,7 @@ found: } } - list_for_each_entry(q, &crypto_alg_list, cra_list) { - if (q == alg) - continue; - - if (crypto_is_moribund(q)) - continue; - - if (crypto_is_larval(q)) { - struct crypto_larval *larval = (void *)q; - - /* - * Check to see if either our generic name or - * specific name can satisfy the name requested - * by the larval entry q. - */ - if (strcmp(alg->cra_name, q->cra_name) && - strcmp(alg->cra_driver_name, q->cra_name)) - continue; - - if (larval->adult) - continue; - if ((q->cra_flags ^ alg->cra_flags) & larval->mask) - continue; - - if (best && crypto_mod_get(alg)) - larval->adult = alg; - else - larval->adult = ERR_PTR(-EAGAIN); - - continue; - } - - if (strcmp(alg->cra_name, q->cra_name)) - continue; - - if (strcmp(alg->cra_driver_name, q->cra_driver_name) && - q->cra_priority > alg->cra_priority) - continue; - - crypto_remove_spawns(q, &list, alg); - } + crypto_alg_finish_registration(alg, best, &list); complete: complete_all(&test->completion); @@ -423,7 +442,8 @@ EXPORT_SYMBOL_GPL(crypto_remove_final); int crypto_register_alg(struct crypto_alg *alg) { struct crypto_larval *larval; - bool test_started; + LIST_HEAD(algs_to_put); + bool test_started = false; int err; alg->cra_flags &= ~CRYPTO_ALG_DEAD; @@ -432,17 +452,18 @@ int crypto_register_alg(struct crypto_alg *alg) return err; down_write(&crypto_alg_sem); - larval = __crypto_register_alg(alg); - test_started = static_key_enabled(&crypto_boot_test_finished); - if (!IS_ERR_OR_NULL(larval)) + larval = __crypto_register_alg(alg, &algs_to_put); + if (!IS_ERR_OR_NULL(larval)) { + test_started = crypto_boot_test_finished(); larval->test_started = test_started; + } up_write(&crypto_alg_sem); - if (IS_ERR_OR_NULL(larval)) + if (IS_ERR(larval)) return PTR_ERR(larval); - if (test_started) crypto_wait_for_test(larval); + crypto_remove_final(&algs_to_put); return 0; } EXPORT_SYMBOL_GPL(crypto_register_alg); @@ -619,6 +640,7 @@ int crypto_register_instance(struct crypto_template *tmpl, struct crypto_larval *larval; struct crypto_spawn *spawn; u32 fips_internal = 0; + LIST_HEAD(algs_to_put); int err; err = crypto_check_alg(&inst->alg); @@ -650,7 +672,7 @@ int crypto_register_instance(struct crypto_template *tmpl, inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL); - larval = __crypto_register_alg(&inst->alg); + larval = __crypto_register_alg(&inst->alg, &algs_to_put); if (IS_ERR(larval)) goto unlock; else if (larval) @@ -662,15 +684,12 @@ int crypto_register_instance(struct crypto_template *tmpl, unlock: up_write(&crypto_alg_sem); - err = PTR_ERR(larval); - if (IS_ERR_OR_NULL(larval)) - goto err; - - crypto_wait_for_test(larval); - err = 0; - -err: - return err; + if (IS_ERR(larval)) + return PTR_ERR(larval); + if (larval) + crypto_wait_for_test(larval); + crypto_remove_final(&algs_to_put); + return 0; } EXPORT_SYMBOL_GPL(crypto_register_instance); @@ -1234,6 +1253,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt); static void __init crypto_start_tests(void) { + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return; + for (;;) { struct crypto_larval *larval = NULL; struct crypto_alg *q; @@ -1267,7 +1289,7 @@ static void __init crypto_start_tests(void) crypto_wait_for_test(larval); } - static_branch_enable(&crypto_boot_test_finished); + set_crypto_boot_test_finished(); } static int __init crypto_algapi_init(void) diff --git a/crypto/algboss.c b/crypto/algboss.c index eb5fe84efb83e69cba4335f896b6a8f740630de2..0de1e66979498eb3b49b3862c37879e619fa0f68 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -175,18 +175,10 @@ static int cryptomgr_test(void *data) { struct crypto_test_param *param = data; u32 type = param->type; - int err = 0; - -#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS - goto skiptest; -#endif - - if (type & CRYPTO_ALG_TESTED) - goto skiptest; + int err; err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED); -skiptest: crypto_alg_tested(param->driver, err); kfree(param); @@ -197,7 +189,9 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) { struct task_struct *thread; struct crypto_test_param *param; - u32 type; + + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return NOTIFY_DONE; if (!try_module_get(THIS_MODULE)) goto err; @@ -208,13 +202,7 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg) memcpy(param->driver, alg->cra_driver_name, sizeof(param->driver)); memcpy(param->alg, alg->cra_name, sizeof(param->alg)); - type = alg->cra_flags; - - /* Do not test internal algorithms. */ - if (type & CRYPTO_ALG_INTERNAL) - type |= CRYPTO_ALG_TESTED; - - param->type = type; + param->type = alg->cra_flags; thread = kthread_run(cryptomgr_test, param, "cryptomgr_test"); if (IS_ERR(thread)) diff --git a/crypto/anubis.c b/crypto/anubis.c index 5da0241ef453c89557decd0f1cddefd137165047..9f0cf61bbc6e2638140045f3954a69e77498a4ea 100644 --- a/crypto/anubis.c +++ b/crypto/anubis.c @@ -29,11 +29,11 @@ * */ +#include #include #include #include #include -#include #include #define ANUBIS_MIN_KEY_SIZE 16 diff --git a/crypto/api.c b/crypto/api.c index 64f2d365a8e94d5bcd9a199e44b465da105030a4..b022702f643672ea6ad2873bb1b95b5bc828ade2 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -31,8 +31,10 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem); BLOCKING_NOTIFIER_HEAD(crypto_chain); EXPORT_SYMBOL_GPL(crypto_chain); -DEFINE_STATIC_KEY_FALSE(crypto_boot_test_finished); -EXPORT_SYMBOL_GPL(crypto_boot_test_finished); +#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS +DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished); +EXPORT_SYMBOL_GPL(__crypto_boot_test_finished); +#endif static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg); @@ -172,9 +174,6 @@ void crypto_wait_for_test(struct crypto_larval *larval) err = wait_for_completion_killable(&larval->completion); WARN_ON(err); - if (!err) - crypto_notify(CRYPTO_MSG_ALG_LOADED, larval); - out: crypto_larval_kill(&larval->alg); } @@ -205,7 +204,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg) struct crypto_larval *larval = (void *)alg; long timeout; - if (!static_branch_likely(&crypto_boot_test_finished)) + if (!crypto_boot_test_finished()) crypto_start_test(larval); timeout = wait_for_completion_killable_timeout( diff --git a/crypto/blowfish_common.c b/crypto/blowfish_common.c index 1c072012baff4813c52555f1b36a243cdfb9e55e..c0208ce269a337015eba68122ce38fa5132e1012 100644 --- a/crypto/blowfish_common.c +++ b/crypto/blowfish_common.c @@ -14,11 +14,12 @@ * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris */ + +#include #include #include #include #include -#include #include #include diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c index 003b52c6880eabbc1af53bb2999452a420ae7962..0e74c7242e77e409ebfa7b71d1759a5f3f10081e 100644 --- a/crypto/blowfish_generic.c +++ b/crypto/blowfish_generic.c @@ -11,11 +11,12 @@ * Copyright (c) Kyle McMartin * Copyright (c) 2002 James Morris */ + +#include #include #include #include #include -#include #include #include diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c index fd1a88af9e77fd091292e72b6c27753ca5d24331..c04670cf51acfb7fe089ccb4005c442c5f8dda6a 100644 --- a/crypto/camellia_generic.c +++ b/crypto/camellia_generic.c @@ -9,7 +9,7 @@ * https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html */ -#include +#include #include #include #include diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c index 0257c14cefc25abe032e0d302aa8e40a05e6432c..085a1eedae03bbfed08ab4b505fb0e5d1be10d7a 100644 --- a/crypto/cast5_generic.c +++ b/crypto/cast5_generic.c @@ -14,8 +14,8 @@ #include +#include #include -#include #include #include #include diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c index 75346380aa0bc7d8b5e910b913036f2954e81565..34f1ab53e3a713ba44a8442b917e95e8f56f7edd 100644 --- a/crypto/cast6_generic.c +++ b/crypto/cast6_generic.c @@ -11,8 +11,8 @@ #include +#include #include -#include #include #include #include diff --git a/crypto/ccm.c b/crypto/ccm.c index 6b815ece51c6a930dc472fa3624e85591beb64d5..30dbae72728f73d4eaab3778cb02aceff260df20 100644 --- a/crypto/ccm.c +++ b/crypto/ccm.c @@ -218,7 +218,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain, cryptlen += ilen; } - ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen); + ahash_request_set_crypt(ahreq, plain, odata, cryptlen); err = crypto_ahash_finup(ahreq); out: return err; diff --git a/crypto/cryptd.c b/crypto/cryptd.c index 668095eca0fafb52137e33ee70a97c2695b3180b..ca3a40fc7da911816b956e9aa37d6534592c493d 100644 --- a/crypto/cryptd.c +++ b/crypto/cryptd.c @@ -68,11 +68,12 @@ struct aead_instance_ctx { struct cryptd_skcipher_ctx { refcount_t refcnt; - struct crypto_sync_skcipher *child; + struct crypto_skcipher *child; }; struct cryptd_skcipher_request_ctx { crypto_completion_t complete; + struct skcipher_request req; }; struct cryptd_hash_ctx { @@ -227,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent, const u8 *key, unsigned int keylen) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent); - struct crypto_sync_skcipher *child = ctx->child; + struct crypto_skcipher *child = ctx->child; - crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); - crypto_sync_skcipher_set_flags(child, - crypto_skcipher_get_flags(parent) & - CRYPTO_TFM_REQ_MASK); - return crypto_sync_skcipher_setkey(child, key, keylen); + crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(child, + crypto_skcipher_get_flags(parent) & + CRYPTO_TFM_REQ_MASK); + return crypto_skcipher_setkey(child, key, keylen); } static void cryptd_skcipher_complete(struct skcipher_request *req, int err) @@ -258,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base, struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_sync_skcipher *child = ctx->child; - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + struct skcipher_request *subreq = &rctx->req; + struct crypto_skcipher *child = ctx->child; if (unlikely(err == -EINPROGRESS)) goto out; - skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, @@ -286,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base, struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req); struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct crypto_sync_skcipher *child = ctx->child; - SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child); + struct skcipher_request *subreq = &rctx->req; + struct crypto_skcipher *child = ctx->child; if (unlikely(err == -EINPROGRESS)) goto out; - skcipher_request_set_sync_tfm(subreq, child); + skcipher_request_set_tfm(subreq, child); skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL); skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen, @@ -343,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm) if (IS_ERR(cipher)) return PTR_ERR(cipher); - ctx->child = (struct crypto_sync_skcipher *)cipher; + ctx->child = cipher; crypto_skcipher_set_reqsize( - tfm, sizeof(struct cryptd_skcipher_request_ctx)); + tfm, sizeof(struct cryptd_skcipher_request_ctx) + + crypto_skcipher_reqsize(cipher)); return 0; } @@ -353,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm); - crypto_free_sync_skcipher(ctx->child); + crypto_free_skcipher(ctx->child); } static void cryptd_skcipher_free(struct skcipher_instance *inst) @@ -931,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm) { struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base); - return &ctx->child->base; + return ctx->child; } EXPORT_SYMBOL_GPL(cryptd_skcipher_child); diff --git a/crypto/des_generic.c b/crypto/des_generic.c index c85354a5e94c7bd5befc80f754ce8297c1affc14..1274e18d3eb901500e92d09cbf232170eb1da07a 100644 --- a/crypto/des_generic.c +++ b/crypto/des_generic.c @@ -8,11 +8,11 @@ */ #include +#include #include #include #include #include -#include #include diff --git a/crypto/dh.c b/crypto/dh.c index 99c3b2ef7adca58c5043838961e7c2cce6d29d9c..e39c1bde1ac07bdd1606eec6161789f3786a8e57 100644 --- a/crypto/dh.c +++ b/crypto/dh.c @@ -318,6 +318,9 @@ static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm) if (IS_ERR(tfm_ctx->dh_tfm)) return PTR_ERR(tfm_ctx->dh_tfm); + kpp_set_reqsize(tfm, sizeof(struct kpp_request) + + crypto_kpp_reqsize(tfm_ctx->dh_tfm)); + return 0; } @@ -593,7 +596,6 @@ static int __maybe_unused __dh_safe_prime_create( inst->alg.max_size = dh_safe_prime_max_size; inst->alg.init = dh_safe_prime_init_tfm; inst->alg.exit = dh_safe_prime_exit_tfm; - inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize; inst->alg.base.cra_priority = dh_alg->base.cra_priority; inst->alg.base.cra_module = THIS_MODULE; inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx); diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c index 76a04d000c0d3fd55c1521862208c04def6ce7bd..95a16e88899bafbe6a86417a4274561e8ed2f3eb 100644 --- a/crypto/fcrypt.c +++ b/crypto/fcrypt.c @@ -43,10 +43,10 @@ */ #include +#include #include #include #include -#include #define ROUNDS 16 diff --git a/crypto/internal.h b/crypto/internal.h index c08385571853ee677fc415022980c889917191a4..932f0aafddc32dc3974e71bdb8d0f37ad9e3aa80 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -47,7 +47,25 @@ extern struct list_head crypto_alg_list; extern struct rw_semaphore crypto_alg_sem; extern struct blocking_notifier_head crypto_chain; -DECLARE_STATIC_KEY_FALSE(crypto_boot_test_finished); +#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS +static inline bool crypto_boot_test_finished(void) +{ + return true; +} +static inline void set_crypto_boot_test_finished(void) +{ +} +#else +DECLARE_STATIC_KEY_FALSE(__crypto_boot_test_finished); +static inline bool crypto_boot_test_finished(void) +{ + return static_branch_likely(&__crypto_boot_test_finished); +} +static inline void set_crypto_boot_test_finished(void) +{ + static_branch_enable(&__crypto_boot_test_finished); +} +#endif /* !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */ #ifdef CONFIG_PROC_FS void __init crypto_init_proc(void); diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c index 58edf7797abfb017a529b3a7e7f384d0d1a2d64f..c3f9938e1ad27fd838508ca5d3e93ab136e350dd 100644 --- a/crypto/kdf_sp800108.c +++ b/crypto/kdf_sp800108.c @@ -125,9 +125,13 @@ static const struct kdf_testvec kdf_ctr_hmac_sha256_tv_template[] = { static int __init crypto_kdf108_init(void) { - int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)", - crypto_kdf108_setkey, crypto_kdf108_ctr_generate); + int ret; + if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS)) + return 0; + + ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)", + crypto_kdf108_setkey, crypto_kdf108_ctr_generate); if (ret) { if (fips_enabled) panic("alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n", @@ -136,7 +140,7 @@ static int __init crypto_kdf108_init(void) WARN(1, "alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n", ret); - } else { + } else if (fips_enabled) { pr_info("alg: self-tests for CTR-KDF (hmac(sha256)) passed\n"); } diff --git a/crypto/khazad.c b/crypto/khazad.c index f19339954c89e0aee5a08d9cde36c18fe022e1d3..70cafe73f97405ca52044f18446c2736fb4c7a8d 100644 --- a/crypto/khazad.c +++ b/crypto/khazad.c @@ -19,11 +19,11 @@ * */ +#include #include #include #include #include -#include #include #define KHAZAD_KEY_SIZE 16 diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c index e75728f87ce5620ff2aa159f14fdc820492f414a..6ee5b8a060c0674e1e9730a53466331d4158eca2 100644 --- a/crypto/rsa-pkcs1pad.c +++ b/crypto/rsa-pkcs1pad.c @@ -579,6 +579,10 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm) return PTR_ERR(child_tfm); ctx->child = child_tfm; + + akcipher_set_reqsize(tfm, sizeof(struct pkcs1pad_request) + + crypto_akcipher_reqsize(child_tfm)); + return 0; } @@ -674,7 +678,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb) inst->alg.set_pub_key = pkcs1pad_set_pub_key; inst->alg.set_priv_key = pkcs1pad_set_priv_key; inst->alg.max_size = pkcs1pad_get_max_size; - inst->alg.reqsize = sizeof(struct pkcs1pad_request) + rsa_alg->reqsize; inst->free = pkcs1pad_free; diff --git a/crypto/seed.c b/crypto/seed.c index 27720140820ef4e9f758bed25f40899ada6adabf..d0506ade2a5f87fbaf2fbc915df0dd53e33c2f5e 100644 --- a/crypto/seed.c +++ b/crypto/seed.c @@ -8,11 +8,11 @@ * Copyright (C) 2007 Korea Information Security Agency (KISA). */ +#include #include #include #include #include -#include #include #define SEED_NUM_KCONSTANTS 16 diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c index 45f98b750053e72d7bc4ca9aa069f965291a5c18..c6bca47931e21ec86a8196097f05257bb025654e 100644 --- a/crypto/serpent_generic.c +++ b/crypto/serpent_generic.c @@ -7,11 +7,11 @@ * Copyright (C) 2002 Dag Arne Osvik */ +#include #include #include #include #include -#include #include #include diff --git a/crypto/shash.c b/crypto/shash.c index 4c88e63b3350fc7f6d6e76dec62b41d4ca064cf8..868b6ba2b3b74e8ac6e644c707fb11a854767abb 100644 --- a/crypto/shash.c +++ b/crypto/shash.c @@ -18,26 +18,16 @@ #include "internal.h" +#define MAX_SHASH_ALIGNMASK 63 + static const struct crypto_type crypto_shash_type; -static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) +int shash_no_setkey(struct crypto_shash *tfm, const u8 *key, + unsigned int keylen) { return -ENOSYS; } - -/* - * Check whether an shash algorithm has a setkey function. - * - * For CFI compatibility, this must not be an inline function. This is because - * when CFI is enabled, modules won't get the same address for shash_no_setkey - * (if it were exported, which inlining would require) as the core kernel will. - */ -bool crypto_shash_alg_has_setkey(struct shash_alg *alg) -{ - return alg->setkey != shash_no_setkey; -} -EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey); +EXPORT_SYMBOL_GPL(shash_no_setkey); static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) @@ -100,7 +90,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data, * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ - u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2]; + u8 ubuf[MAX_SHASH_ALIGNMASK * 2]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -142,7 +132,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out) * We cannot count on __aligned() working for large values: * https://patchwork.kernel.org/patch/9507697/ */ - u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE]; + u8 ubuf[MAX_SHASH_ALIGNMASK + HASH_MAX_DIGESTSIZE]; u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1); int err; @@ -536,6 +526,9 @@ static int shash_prepare_alg(struct shash_alg *alg) alg->statesize > HASH_MAX_STATESIZE) return -EINVAL; + if (base->cra_alignmask > MAX_SHASH_ALIGNMASK) + return -EINVAL; + if ((alg->export && !alg->import) || (alg->import && !alg->export)) return -EINVAL; diff --git a/crypto/skcipher.c b/crypto/skcipher.c index 418211180ceec379570c7de99ef43d86861b7c5a..0ecab31cfe79bfd8acc54dae56986c768a308373 100644 --- a/crypto/skcipher.c +++ b/crypto/skcipher.c @@ -763,7 +763,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher( struct crypto_skcipher *tfm; /* Only sync algorithms allowed. */ - mask |= CRYPTO_ALG_ASYNC; + mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE; tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask); diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c index 4a6480a27fee51869331f74e64a329c00ec912ef..560eba37dc55eae6e2f9cacb5ec339a71618c0fb 100644 --- a/crypto/sm4_generic.c +++ b/crypto/sm4_generic.c @@ -7,12 +7,12 @@ * All rights reserved. */ +#include #include #include #include #include #include -#include #include #include diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index a82679b576bb4381771a0f87ec0e401ef219a68e..a0833654ce946d25320b29665ac9e23841ab06dd 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -324,7 +324,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs, crypto_req_done, &data[i].wait); } - pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo, + pr_info("testing speed of multibuffer %s (%s) %s\n", algo, get_driver_name(crypto_aead, tfm), e); i = 0; @@ -506,8 +506,8 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) out: if (ret == 0) - printk("1 operation in %lu cycles (%d bytes)\n", - (cycles + 4) / 8, blen); + pr_cont("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / 8, blen); return ret; } @@ -575,8 +575,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, } crypto_init_wait(&wait); - printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo, - get_driver_name(crypto_aead, tfm), e); + pr_info("testing speed of %s (%s) %s\n", algo, + get_driver_name(crypto_aead, tfm), e); req = aead_request_alloc(tfm, GFP_KERNEL); if (!req) { @@ -624,8 +624,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs, memset(iv, 0xff, iv_len); crypto_aead_clear_flags(tfm, ~0); - printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", - i, *keysize * 8, bs); + pr_info("test %u (%d bit key, %d byte blocks): ", + i, *keysize * 8, bs); memset(tvmem[0], 0xff, PAGE_SIZE); @@ -727,8 +727,8 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen, return ret; } - printk("%6u opers/sec, %9lu bytes/sec\n", - bcount / secs, ((long)bcount * blen) / secs); + pr_cont("%6u opers/sec, %9lu bytes/sec\n", + bcount / secs, ((long)bcount * blen) / secs); return 0; } @@ -877,8 +877,8 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs, return; } - printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo, - get_driver_name(crypto_ahash, tfm)); + pr_info("testing speed of async %s (%s)\n", algo, + get_driver_name(crypto_ahash, tfm)); if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) { pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm), @@ -1090,15 +1090,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, goto out_free_tfm; } - - for (i = 0; i < num_mb; ++i) - if (testmgr_alloc_buf(data[i].xbuf)) { - while (i--) - testmgr_free_buf(data[i].xbuf); - goto out_free_tfm; - } - - for (i = 0; i < num_mb; ++i) { data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!data[i].req) { @@ -1117,7 +1108,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs, crypto_init_wait(&data[i].wait); } - pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo, + pr_info("testing speed of multibuffer %s (%s) %s\n", algo, get_driver_name(crypto_skcipher, tfm), e); i = 0; @@ -1324,13 +1315,12 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs, return; } - pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync", + pr_info("testing speed of %s %s (%s) %s\n", async ? "async" : "sync", algo, get_driver_name(crypto_skcipher, tfm), e); req = skcipher_request_alloc(tfm, GFP_KERNEL); if (!req) { - pr_err("tcrypt: skcipher: Failed to allocate request for %s\n", - algo); + pr_err("skcipher: Failed to allocate request for %s\n", algo); goto out; } @@ -1471,387 +1461,396 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) } for (i = 1; i < 200; i++) - ret += do_test(NULL, 0, 0, i, num_mb); + ret = min(ret, do_test(NULL, 0, 0, i, num_mb)); break; case 1: - ret += tcrypt_test("md5"); + ret = min(ret, tcrypt_test("md5")); break; case 2: - ret += tcrypt_test("sha1"); + ret = min(ret, tcrypt_test("sha1")); break; case 3: - ret += tcrypt_test("ecb(des)"); - ret += tcrypt_test("cbc(des)"); - ret += tcrypt_test("ctr(des)"); + ret = min(ret, tcrypt_test("ecb(des)")); + ret = min(ret, tcrypt_test("cbc(des)")); + ret = min(ret, tcrypt_test("ctr(des)")); break; case 4: - ret += tcrypt_test("ecb(des3_ede)"); - ret += tcrypt_test("cbc(des3_ede)"); - ret += tcrypt_test("ctr(des3_ede)"); + ret = min(ret, tcrypt_test("ecb(des3_ede)")); + ret = min(ret, tcrypt_test("cbc(des3_ede)")); + ret = min(ret, tcrypt_test("ctr(des3_ede)")); break; case 5: - ret += tcrypt_test("md4"); + ret = min(ret, tcrypt_test("md4")); break; case 6: - ret += tcrypt_test("sha256"); + ret = min(ret, tcrypt_test("sha256")); break; case 7: - ret += tcrypt_test("ecb(blowfish)"); - ret += tcrypt_test("cbc(blowfish)"); - ret += tcrypt_test("ctr(blowfish)"); + ret = min(ret, tcrypt_test("ecb(blowfish)")); + ret = min(ret, tcrypt_test("cbc(blowfish)")); + ret = min(ret, tcrypt_test("ctr(blowfish)")); break; case 8: - ret += tcrypt_test("ecb(twofish)"); - ret += tcrypt_test("cbc(twofish)"); - ret += tcrypt_test("ctr(twofish)"); - ret += tcrypt_test("lrw(twofish)"); - ret += tcrypt_test("xts(twofish)"); + ret = min(ret, tcrypt_test("ecb(twofish)")); + ret = min(ret, tcrypt_test("cbc(twofish)")); + ret = min(ret, tcrypt_test("ctr(twofish)")); + ret = min(ret, tcrypt_test("lrw(twofish)")); + ret = min(ret, tcrypt_test("xts(twofish)")); break; case 9: - ret += tcrypt_test("ecb(serpent)"); - ret += tcrypt_test("cbc(serpent)"); - ret += tcrypt_test("ctr(serpent)"); - ret += tcrypt_test("lrw(serpent)"); - ret += tcrypt_test("xts(serpent)"); + ret = min(ret, tcrypt_test("ecb(serpent)")); + ret = min(ret, tcrypt_test("cbc(serpent)")); + ret = min(ret, tcrypt_test("ctr(serpent)")); + ret = min(ret, tcrypt_test("lrw(serpent)")); + ret = min(ret, tcrypt_test("xts(serpent)")); break; case 10: - ret += tcrypt_test("ecb(aes)"); - ret += tcrypt_test("cbc(aes)"); - ret += tcrypt_test("lrw(aes)"); - ret += tcrypt_test("xts(aes)"); - ret += tcrypt_test("ctr(aes)"); - ret += tcrypt_test("rfc3686(ctr(aes))"); - ret += tcrypt_test("ofb(aes)"); - ret += tcrypt_test("cfb(aes)"); - ret += tcrypt_test("xctr(aes)"); + ret = min(ret, tcrypt_test("ecb(aes)")); + ret = min(ret, tcrypt_test("cbc(aes)")); + ret = min(ret, tcrypt_test("lrw(aes)")); + ret = min(ret, tcrypt_test("xts(aes)")); + ret = min(ret, tcrypt_test("ctr(aes)")); + ret = min(ret, tcrypt_test("rfc3686(ctr(aes))")); + ret = min(ret, tcrypt_test("ofb(aes)")); + ret = min(ret, tcrypt_test("cfb(aes)")); + ret = min(ret, tcrypt_test("xctr(aes)")); break; case 11: - ret += tcrypt_test("sha384"); + ret = min(ret, tcrypt_test("sha384")); break; case 12: - ret += tcrypt_test("sha512"); + ret = min(ret, tcrypt_test("sha512")); break; case 13: - ret += tcrypt_test("deflate"); + ret = min(ret, tcrypt_test("deflate")); break; case 14: - ret += tcrypt_test("ecb(cast5)"); - ret += tcrypt_test("cbc(cast5)"); - ret += tcrypt_test("ctr(cast5)"); + ret = min(ret, tcrypt_test("ecb(cast5)")); + ret = min(ret, tcrypt_test("cbc(cast5)")); + ret = min(ret, tcrypt_test("ctr(cast5)")); break; case 15: - ret += tcrypt_test("ecb(cast6)"); - ret += tcrypt_test("cbc(cast6)"); - ret += tcrypt_test("ctr(cast6)"); - ret += tcrypt_test("lrw(cast6)"); - ret += tcrypt_test("xts(cast6)"); + ret = min(ret, tcrypt_test("ecb(cast6)")); + ret = min(ret, tcrypt_test("cbc(cast6)")); + ret = min(ret, tcrypt_test("ctr(cast6)")); + ret = min(ret, tcrypt_test("lrw(cast6)")); + ret = min(ret, tcrypt_test("xts(cast6)")); break; case 16: - ret += tcrypt_test("ecb(arc4)"); + ret = min(ret, tcrypt_test("ecb(arc4)")); break; case 17: - ret += tcrypt_test("michael_mic"); + ret = min(ret, tcrypt_test("michael_mic")); break; case 18: - ret += tcrypt_test("crc32c"); + ret = min(ret, tcrypt_test("crc32c")); break; case 19: - ret += tcrypt_test("ecb(tea)"); + ret = min(ret, tcrypt_test("ecb(tea)")); break; case 20: - ret += tcrypt_test("ecb(xtea)"); + ret = min(ret, tcrypt_test("ecb(xtea)")); break; case 21: - ret += tcrypt_test("ecb(khazad)"); + ret = min(ret, tcrypt_test("ecb(khazad)")); break; case 22: - ret += tcrypt_test("wp512"); + ret = min(ret, tcrypt_test("wp512")); break; case 23: - ret += tcrypt_test("wp384"); + ret = min(ret, tcrypt_test("wp384")); break; case 24: - ret += tcrypt_test("wp256"); + ret = min(ret, tcrypt_test("wp256")); break; case 26: - ret += tcrypt_test("ecb(anubis)"); - ret += tcrypt_test("cbc(anubis)"); + ret = min(ret, tcrypt_test("ecb(anubis)")); + ret = min(ret, tcrypt_test("cbc(anubis)")); break; case 30: - ret += tcrypt_test("ecb(xeta)"); + ret = min(ret, tcrypt_test("ecb(xeta)")); break; case 31: - ret += tcrypt_test("pcbc(fcrypt)"); + ret = min(ret, tcrypt_test("pcbc(fcrypt)")); break; case 32: - ret += tcrypt_test("ecb(camellia)"); - ret += tcrypt_test("cbc(camellia)"); - ret += tcrypt_test("ctr(camellia)"); - ret += tcrypt_test("lrw(camellia)"); - ret += tcrypt_test("xts(camellia)"); + ret = min(ret, tcrypt_test("ecb(camellia)")); + ret = min(ret, tcrypt_test("cbc(camellia)")); + ret = min(ret, tcrypt_test("ctr(camellia)")); + ret = min(ret, tcrypt_test("lrw(camellia)")); + ret = min(ret, tcrypt_test("xts(camellia)")); break; case 33: - ret += tcrypt_test("sha224"); + ret = min(ret, tcrypt_test("sha224")); break; case 35: - ret += tcrypt_test("gcm(aes)"); + ret = min(ret, tcrypt_test("gcm(aes)")); break; case 36: - ret += tcrypt_test("lzo"); + ret = min(ret, tcrypt_test("lzo")); break; case 37: - ret += tcrypt_test("ccm(aes)"); + ret = min(ret, tcrypt_test("ccm(aes)")); break; case 38: - ret += tcrypt_test("cts(cbc(aes))"); + ret = min(ret, tcrypt_test("cts(cbc(aes))")); break; case 39: - ret += tcrypt_test("xxhash64"); + ret = min(ret, tcrypt_test("xxhash64")); break; case 40: - ret += tcrypt_test("rmd160"); + ret = min(ret, tcrypt_test("rmd160")); break; case 42: - ret += tcrypt_test("blake2b-512"); + ret = min(ret, tcrypt_test("blake2b-512")); break; case 43: - ret += tcrypt_test("ecb(seed)"); + ret = min(ret, tcrypt_test("ecb(seed)")); break; case 45: - ret += tcrypt_test("rfc4309(ccm(aes))"); + ret = min(ret, tcrypt_test("rfc4309(ccm(aes))")); break; case 46: - ret += tcrypt_test("ghash"); + ret = min(ret, tcrypt_test("ghash")); break; case 47: - ret += tcrypt_test("crct10dif"); + ret = min(ret, tcrypt_test("crct10dif")); break; case 48: - ret += tcrypt_test("sha3-224"); + ret = min(ret, tcrypt_test("sha3-224")); break; case 49: - ret += tcrypt_test("sha3-256"); + ret = min(ret, tcrypt_test("sha3-256")); break; case 50: - ret += tcrypt_test("sha3-384"); + ret = min(ret, tcrypt_test("sha3-384")); break; case 51: - ret += tcrypt_test("sha3-512"); + ret = min(ret, tcrypt_test("sha3-512")); break; case 52: - ret += tcrypt_test("sm3"); + ret = min(ret, tcrypt_test("sm3")); break; case 53: - ret += tcrypt_test("streebog256"); + ret = min(ret, tcrypt_test("streebog256")); break; case 54: - ret += tcrypt_test("streebog512"); + ret = min(ret, tcrypt_test("streebog512")); break; case 55: - ret += tcrypt_test("gcm(sm4)"); + ret = min(ret, tcrypt_test("gcm(sm4)")); break; case 56: - ret += tcrypt_test("ccm(sm4)"); + ret = min(ret, tcrypt_test("ccm(sm4)")); break; case 57: - ret += tcrypt_test("polyval"); + ret = min(ret, tcrypt_test("polyval")); break; case 58: - ret += tcrypt_test("gcm(aria)"); + ret = min(ret, tcrypt_test("gcm(aria)")); + break; + + case 59: + ret = min(ret, tcrypt_test("cts(cbc(sm4))")); break; case 100: - ret += tcrypt_test("hmac(md5)"); + ret = min(ret, tcrypt_test("hmac(md5)")); break; case 101: - ret += tcrypt_test("hmac(sha1)"); + ret = min(ret, tcrypt_test("hmac(sha1)")); break; case 102: - ret += tcrypt_test("hmac(sha256)"); + ret = min(ret, tcrypt_test("hmac(sha256)")); break; case 103: - ret += tcrypt_test("hmac(sha384)"); + ret = min(ret, tcrypt_test("hmac(sha384)")); break; case 104: - ret += tcrypt_test("hmac(sha512)"); + ret = min(ret, tcrypt_test("hmac(sha512)")); break; case 105: - ret += tcrypt_test("hmac(sha224)"); + ret = min(ret, tcrypt_test("hmac(sha224)")); break; case 106: - ret += tcrypt_test("xcbc(aes)"); + ret = min(ret, tcrypt_test("xcbc(aes)")); break; case 108: - ret += tcrypt_test("hmac(rmd160)"); + ret = min(ret, tcrypt_test("hmac(rmd160)")); break; case 109: - ret += tcrypt_test("vmac64(aes)"); + ret = min(ret, tcrypt_test("vmac64(aes)")); break; case 111: - ret += tcrypt_test("hmac(sha3-224)"); + ret = min(ret, tcrypt_test("hmac(sha3-224)")); break; case 112: - ret += tcrypt_test("hmac(sha3-256)"); + ret = min(ret, tcrypt_test("hmac(sha3-256)")); break; case 113: - ret += tcrypt_test("hmac(sha3-384)"); + ret = min(ret, tcrypt_test("hmac(sha3-384)")); break; case 114: - ret += tcrypt_test("hmac(sha3-512)"); + ret = min(ret, tcrypt_test("hmac(sha3-512)")); break; case 115: - ret += tcrypt_test("hmac(streebog256)"); + ret = min(ret, tcrypt_test("hmac(streebog256)")); break; case 116: - ret += tcrypt_test("hmac(streebog512)"); + ret = min(ret, tcrypt_test("hmac(streebog512)")); break; case 150: - ret += tcrypt_test("ansi_cprng"); + ret = min(ret, tcrypt_test("ansi_cprng")); break; case 151: - ret += tcrypt_test("rfc4106(gcm(aes))"); + ret = min(ret, tcrypt_test("rfc4106(gcm(aes))")); break; case 152: - ret += tcrypt_test("rfc4543(gcm(aes))"); + ret = min(ret, tcrypt_test("rfc4543(gcm(aes))")); break; case 153: - ret += tcrypt_test("cmac(aes)"); + ret = min(ret, tcrypt_test("cmac(aes)")); break; case 154: - ret += tcrypt_test("cmac(des3_ede)"); + ret = min(ret, tcrypt_test("cmac(des3_ede)")); break; case 155: - ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(aes))")); break; case 156: - ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"); + ret = min(ret, tcrypt_test("authenc(hmac(md5),ecb(cipher_null))")); break; case 157: - ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))")); break; case 158: - ret += tcrypt_test("cbcmac(sm4)"); + ret = min(ret, tcrypt_test("cbcmac(sm4)")); break; case 159: - ret += tcrypt_test("cmac(sm4)"); + ret = min(ret, tcrypt_test("cmac(sm4)")); + break; + + case 160: + ret = min(ret, tcrypt_test("xcbc(sm4)")); break; case 181: - ret += tcrypt_test("authenc(hmac(sha1),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))")); break; case 182: - ret += tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))")); break; case 183: - ret += tcrypt_test("authenc(hmac(sha224),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des))")); break; case 184: - ret += tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))")); break; case 185: - ret += tcrypt_test("authenc(hmac(sha256),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des))")); break; case 186: - ret += tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))")); break; case 187: - ret += tcrypt_test("authenc(hmac(sha384),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des))")); break; case 188: - ret += tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))")); break; case 189: - ret += tcrypt_test("authenc(hmac(sha512),cbc(des))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des))")); break; case 190: - ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))"); + ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))")); break; case 191: - ret += tcrypt_test("ecb(sm4)"); - ret += tcrypt_test("cbc(sm4)"); - ret += tcrypt_test("cfb(sm4)"); - ret += tcrypt_test("ctr(sm4)"); + ret = min(ret, tcrypt_test("ecb(sm4)")); + ret = min(ret, tcrypt_test("cbc(sm4)")); + ret = min(ret, tcrypt_test("cfb(sm4)")); + ret = min(ret, tcrypt_test("ctr(sm4)")); + ret = min(ret, tcrypt_test("xts(sm4)")); break; case 192: - ret += tcrypt_test("ecb(aria)"); - ret += tcrypt_test("cbc(aria)"); - ret += tcrypt_test("cfb(aria)"); - ret += tcrypt_test("ctr(aria)"); + ret = min(ret, tcrypt_test("ecb(aria)")); + ret = min(ret, tcrypt_test("cbc(aria)")); + ret = min(ret, tcrypt_test("cfb(aria)")); + ret = min(ret, tcrypt_test("ctr(aria)")); break; case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, @@ -2109,6 +2108,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("cts(cbc(sm4))", ENCRYPT, sec, NULL, 0, + speed_template_16); + test_cipher_speed("cts(cbc(sm4))", DECRYPT, sec, NULL, 0, + speed_template_16); test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0, speed_template_16); test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0, @@ -2117,6 +2120,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_cipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_32); + test_cipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0, + speed_template_32); break; case 219: @@ -2630,6 +2637,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb) speed_template_16); test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0, speed_template_16); + test_acipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0, + speed_template_32); + test_acipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0, + speed_template_32); break; case 519: @@ -2885,7 +2896,7 @@ static int __init tcrypt_mod_init(void) err = do_test(alg, type, mask, mode, num_mb); if (err) { - printk(KERN_ERR "tcrypt: one or more tests failed!\n"); + pr_err("one or more tests failed!\n"); goto err_free_tv; } else { pr_debug("all tests passed\n"); diff --git a/crypto/tea.c b/crypto/tea.c index 02efc5d816903097d5f9ba0fcd0dce79ff4e9f50..896f863f3067ce85f129d176a65af8d61d54155e 100644 --- a/crypto/tea.c +++ b/crypto/tea.c @@ -14,11 +14,11 @@ * Copyright (c) 2004 Aaron Grothe ajgrothe@yahoo.com */ +#include #include #include #include #include -#include #include #define TEA_KEY_SIZE 16 diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 589189c9fced55e745822cee378e620437788634..4476ac97baa5e45aa1e57665070a260b49196691 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -4712,6 +4712,12 @@ static const struct alg_test_desc alg_test_descs[] = { .alg = "cts(cbc(paes))", .test = alg_test_null, .fips_allowed = 1, + }, { + .alg = "cts(cbc(sm4))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_cts_tv_template) + } }, { .alg = "curve25519", .test = alg_test_kpp, @@ -5586,6 +5592,12 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .hash = __VECS(aes_xcbc128_tv_template) } + }, { + .alg = "xcbc(sm4)", + .test = alg_test_hash, + .suite = { + .hash = __VECS(sm4_xcbc128_tv_template) + } }, { .alg = "xchacha12", .test = alg_test_skcipher, @@ -5640,6 +5652,13 @@ static const struct alg_test_desc alg_test_descs[] = { .suite = { .cipher = __VECS(serpent_xts_tv_template) } + }, { + .alg = "xts(sm4)", + .generic_driver = "xts(ecb(sm4-generic))", + .test = alg_test_skcipher, + .suite = { + .cipher = __VECS(sm4_xts_tv_template) + } }, { .alg = "xts(twofish)", .generic_driver = "xts(ecb(twofish-generic))", diff --git a/crypto/testmgr.h b/crypto/testmgr.h index d6088e26f3261ce71ec6138d8efede1aa8ad713e..f10bfb9d997353817ae25d992881d84193dd69ef 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -14882,6 +14882,353 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = { } }; +static const struct cipher_testvec sm4_cts_tv_template[] = { + /* Generated from AES-CTS test vectors */ + { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20", + .len = 17, + .ctext = "\x05\xfe\x23\xee\x17\xa2\x89\x98" + "\xbc\x97\x0a\x0b\x54\x67\xca\xd7" + "\xd6", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20", + .len = 31, + .ctext = "\x15\x46\xe4\x95\xa4\xec\xf0\xb8" + "\x49\xd6\x6a\x9d\x89\xc7\xfd\x70" + "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43", + .len = 32, + .ctext = "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3" + "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c", + .len = 47, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\xd3\xe1\xdc\xeb\xfa\x04\x11\x99" + "\xde\xcf\x6f\x4d\x7b\x09\x92\x7f" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c\x20", + .len = 48, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f" + "\xbd\x99\x21\x0c\x5e\x4d\xed\x20" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3", + }, { + .klen = 16, + .key = "\x63\x68\x69\x63\x6b\x65\x6e\x20" + "\x74\x65\x72\x69\x79\x61\x6b\x69", + .ptext = "\x49\x20\x77\x6f\x75\x6c\x64\x20" + "\x6c\x69\x6b\x65\x20\x74\x68\x65" + "\x20\x47\x65\x6e\x65\x72\x61\x6c" + "\x20\x47\x61\x75\x27\x73\x20\x43" + "\x68\x69\x63\x6b\x65\x6e\x2c\x20" + "\x70\x6c\x65\x61\x73\x65\x2c\x20" + "\x61\x6e\x64\x20\x77\x6f\x6e\x74" + "\x6f\x6e\x20\x73\x6f\x75\x70\x2e", + .len = 64, + .ctext = "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66" + "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf" + "\x89\xc7\x99\x3f\x87\x69\x5c\xd3" + "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3" + "\x58\x19\xa4\x8f\xa9\x68\x5e\x6b" + "\x2c\x0f\x81\x60\x15\x98\x27\x4f" + "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f" + "\xbd\x99\x21\x0c\x5e\x4d\xed\x20", + } +}; + +static const struct cipher_testvec sm4_xts_tv_template[] = { + /* Generated from AES-XTS test vectors */ + { + .key = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ctext = "\xd9\xb4\x21\xf7\x31\xc8\x94\xfd" + "\xc3\x5b\x77\x29\x1f\xe4\xe3\xb0" + "\x2a\x1f\xb7\x66\x98\xd5\x9f\x0e" + "\x51\x37\x6c\x4a\xda\x5b\xc7\x5d", + .len = 32, + }, { + .key = "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x11\x11\x11\x11\x11\x11\x11\x11" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\xa7\x4d\x72\x6c\x11\x19\x6a\x32" + "\xbe\x04\xe0\x01\xff\x29\xd0\xc7" + "\x93\x2f\x9f\x3e\xc2\x9b\xfc\xb6" + "\x4d\xd1\x7f\x63\xcb\xd3\xea\x31", + .len = 32, + }, { + .key = "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8" + "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0" + "\x22\x22\x22\x22\x22\x22\x22\x22" + "\x22\x22\x22\x22\x22\x22\x22\x22", + .klen = 32, + .iv = "\x33\x33\x33\x33\x33\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44" + "\x44\x44\x44\x44\x44\x44\x44\x44", + .ctext = "\x7f\x76\x08\x8e\xff\xad\xf7\x0c" + "\x02\xea\x9f\x95\xda\x06\x28\xd3" + "\x51\xbf\xcb\x9e\xac\x05\x63\xbc" + "\xf1\x7b\x71\x0d\xab\x0a\x98\x26", + .len = 32, + }, { + .key = "\x27\x18\x28\x18\x28\x45\x90\x45" + "\x23\x53\x60\x28\x74\x71\x35\x26" + "\x31\x41\x59\x26\x53\x58\x97\x93" + "\x23\x84\x62\x64\x33\x83\x27\x95", + .klen = 32, + .iv = "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff" + "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf" + "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf" + "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7" + "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf" + "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7" + "\xe8\xe9\xea\xeb\xec\xed\xee\xef" + "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7" + "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff", + .ctext = "\x54\xdd\x65\xb6\x32\x6f\xae\xa8" + "\xfa\xd1\xa8\x3c\x63\x61\x4a\xf3" + "\x9f\x72\x1d\x8d\xfe\x17\x7a\x30" + "\xb6\x6a\xbf\x6a\x44\x99\x80\xe1" + "\xcd\xbe\x06\xaf\xb7\x33\x36\xf3" + "\x7a\x4d\x39\xde\x96\x4a\x30\xd7" + "\xd0\x4a\x37\x99\x16\x9c\x60\x25" + "\x8f\x6b\x74\x8a\x61\x86\x1a\xa5" + "\xec\x92\xa2\xc1\x5b\x2b\x7c\x61" + "\x5a\x42\xab\xa4\x99\xbb\xd6\xb7" + "\x1d\xb9\xc7\x89\xb2\x18\x20\x89" + "\xa2\x5d\xd3\xdf\x80\x0e\xd1\x86" + "\x4d\x19\xf7\xed\x45\xfd\x17\xa9" + "\x48\x0b\x0f\xb8\x2d\x9b\x7f\xc3" + "\xed\x57\xe9\xa1\x14\x0e\xaa\x77" + "\x8d\xd2\xdd\x67\x9e\x3e\xdc\x3d" + "\xc4\xd5\x5c\x95\x0e\xbc\x53\x1d" + "\x95\x92\xf7\xc4\x63\x82\x56\xd5" + "\x65\x18\x29\x2a\x20\xaf\x98\xfd" + "\xd3\xa6\x36\x00\x35\x0a\x70\xab" + "\x5a\x40\xf4\xc2\x85\x03\x7c\xa0" + "\x1f\x25\x1f\x19\xec\xae\x03\x29" + "\xff\x77\xad\x88\xcd\x5a\x4c\xde" + "\xa2\xae\xab\xc2\x21\x48\xff\xbd" + "\x23\x9b\xd1\x05\x15\xbd\xe1\x13" + "\x1d\xec\x84\x04\xe4\x43\xdc\x76" + "\x31\x40\xd5\xf2\x2b\xf3\x3e\x0c" + "\x68\x72\xd6\xb8\x1d\x63\x0f\x6f" + "\x00\xcd\xd0\x58\xfe\x80\xf9\xcb" + "\xfb\x77\x70\x7f\x93\xce\xe2\xca" + "\x92\xb9\x15\xb8\x30\x40\x27\xc1" + "\x90\xa8\x4e\x2d\x65\xe0\x18\xcc" + "\x6a\x38\x7d\x37\x66\xac\xdb\x28" + "\x25\x32\x84\xe8\xdb\x9a\xcf\x8f" + "\x52\x28\x0d\xdc\x6d\x00\x33\xd2" + "\xcc\xaa\xa4\xf9\xae\xff\x12\x36" + "\x69\xbc\x02\x4f\xd6\x76\x8e\xdf" + "\x8b\xc1\xf8\xd6\x22\xc1\x9c\x60" + "\x9e\xf9\x7f\x60\x91\x90\xcd\x11" + "\x02\x41\xe7\xfb\x08\x4e\xd8\x94" + "\x2d\xa1\xf9\xb9\xcf\x1b\x51\x4b" + "\x61\xa3\x88\xb3\x0e\xa6\x1a\x4a" + "\x74\x5b\x38\x1e\xe7\xad\x6c\x4d" + "\xb1\x27\x54\x53\xb8\x41\x3f\x98" + "\xdf\x6e\x4a\x40\x98\x6e\xe4\xb5" + "\x9a\xf5\xdf\xae\xcd\x30\x12\x65" + "\x17\x90\x67\xa0\x0d\x7c\xa3\x5a" + "\xb9\x5a\xbd\x61\x7a\xde\xa2\x8e" + "\xc1\xc2\x6a\x97\xde\x28\xb8\xbf" + "\xe3\x01\x20\xd6\xae\xfb\xd2\x58" + "\xc5\x9e\x42\xd1\x61\xe8\x06\x5a" + "\x78\x10\x6b\xdc\xa5\xcd\x90\xfb" + "\x3a\xac\x4e\x93\x86\x6c\x8a\x7f" + "\x96\x76\x86\x0a\x79\x14\x5b\xd9" + "\x2e\x02\xe8\x19\xa9\x0b\xe0\xb9" + "\x7c\xc5\x22\xb3\x21\x06\x85\x6f" + "\xdf\x0e\x54\xd8\x8e\x46\x24\x15" + "\x5a\x2f\x1c\x14\xea\xea\xa1\x63" + "\xf8\x58\xe9\x9a\x80\x6e\x79\x1a" + "\xcd\x82\xf1\xb0\xe2\x9f\x00\x28" + "\xa4\xc3\x8e\x97\x6f\x57\x1a\x93" + "\xf4\xfd\x57\xd7\x87\xc2\x4d\xb0" + "\xe0\x1c\xa3\x04\xe5\xa5\xc4\xdd" + "\x50\xcf\x8b\xdb\xf4\x91\xe5\x7c", + .len = 512, + }, { + .key = "\x62\x49\x77\x57\x24\x70\x93\x69" + "\x99\x59\x57\x49\x66\x96\x76\x27" + "\x02\x88\x41\x97\x16\x93\x99\x37" + "\x51\x05\x82\x09\x74\x94\x45\x92", + .klen = 32, + .iv = "\xff\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .ptext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21\x22\x23\x24\x25\x26\x27" + "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" + "\x30\x31\x32\x33\x34\x35\x36\x37" + "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" + "\x40\x41\x42\x43\x44\x45\x46\x47" + "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" + "\x50\x51\x52\x53\x54\x55\x56\x57" + "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" + "\x60\x61\x62\x63\x64\x65\x66\x67" + "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" + "\x70\x71\x72\x73\x74\x75\x76\x77" + "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f" + "\x80\x81\x82\x83\x84\x85\x86\x87" + "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f" + "\x90\x91\x92\x93\x94\x95\x96\x97" + "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f" + "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7" + "\xa8\xa9\xaa\xab\xac\xad\xae\xaf" + "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7" + "\xf8\xf9\xfa\xfb\xfc", + .ctext = "\xa2\x9f\x9e\x4e\x71\xdb\x28\x3c" + "\x80\x0e\xf6\xb7\x8e\x57\x1c\xba" + "\x90\xda\x3b\x6c\x22\x00\x68\x30" + "\x1d\x63\x0d\x9e\x6a\xad\x37\x55" + "\xbc\x77\x1e\xc9\xad\x83\x30\xd5" + "\x27\xb2\x66\x77\x18\x3c\xa6\x39" + "\x9c\x0a\xaa\x1f\x02\xe1\xd5\x65" + "\x9b\x8d\xc5\x97\x3d\xc5\x04\x53" + "\x78\x00\xe3\xb0\x1a\x43\x4e\xb7" + "\xc4\x9f\x38\xc5\x7b\xa4\x70\x64" + "\x78\xe6\x32\xd9\x65\x44\xc5\x64" + "\xb8\x42\x35\x99\xff\x66\x75\xb0" + "\x22\xd3\x9b\x6e\x8d\xcf\x6a\x24" + "\xfd\x92\xb7\x1b\x04\x28\x2a\x61" + "\xdc\x96\x2a\x20\x7a\x2c\xf1\xf9" + "\x12\x15\xf0\x4d\xcf\x2b\xde\x33" + "\x41\xbc\xe7\x85\x87\x22\xb7\x16" + "\x02\x1c\xd8\xa2\x0f\x1f\xa3\xe9" + "\xd8\x45\x48\xe7\xbe\x08\x4e\x4e" + "\x23\x79\x84\xdb\x40\x76\xf5\x13" + "\x78\x92\x4a\x2f\xf9\x1b\xf2\x80" + "\x25\x74\x51\x45\x9a\x77\x78\x97" + "\xd3\xe0\xc7\xc4\x35\x67\x2a\xe6" + "\xb3\x0d\x62\x9f\x8b", + .len = 189, + }, +}; + static const struct aead_testvec sm4_gcm_tv_template[] = { { /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */ .key = "\x01\x23\x45\x67\x89\xAB\xCD\xEF" @@ -14913,6 +15260,298 @@ static const struct aead_testvec sm4_gcm_tv_template[] = { "\x83\xDE\x35\x41\xE4\xC2\xB5\x81" "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC", .clen = 80, + }, { /* Generated from AES-GCM test vectors */ + .key = zeroed_string, + .klen = 16, + .ctext = "\x23\x2f\x0c\xfe\x30\x8b\x49\xea" + "\x6f\xc8\x82\x29\xb5\xdc\x85\x8d", + .clen = 16, + }, { + .key = zeroed_string, + .klen = 16, + .ptext = zeroed_string, + .plen = 16, + .ctext = "\x7d\xe2\xaa\x7f\x11\x10\x18\x82" + "\x18\x06\x3b\xe1\xbf\xeb\x6d\x89" + "\xb8\x51\xb5\xf3\x94\x93\x75\x2b" + "\xe5\x08\xf1\xbb\x44\x82\xc5\x57", + .clen = 32, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39\x1a\xaf\xd2\x55", + .plen = 64, + .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6" + "\x76\x21\x6a\x33\x83\x10\x41\xeb" + "\x09\x58\x00\x11\x7b\xdc\x3f\x75" + "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb" + "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07" + "\x1a\xe5\x48\x3f\xed\xde\x98\x5d" + "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf" + "\xe3\x63\x36\x83\x23\xf7\x5b\x80" + "\x7d\xfe\x77\xef\x71\xb1\x5e\xc9" + "\x52\x6b\x09\xab\x84\x28\x4b\x8a", + .clen = 80, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\x6d\x6a\x8f\x94\x67\x30\x83\x08", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39", + .plen = 60, + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6" + "\x76\x21\x6a\x33\x83\x10\x41\xeb" + "\x09\x58\x00\x11\x7b\xdc\x3f\x75" + "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb" + "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07" + "\x1a\xe5\x48\x3f\xed\xde\x98\x5d" + "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf" + "\xe3\x63\x36\x83" + "\x89\xf6\xba\x35\xb8\x18\xd3\xcc" + "\x38\x6c\x05\xb3\x8a\xcb\xc9\xde", + .clen = 76, + }, { + .key = "\xfe\xff\xe9\x92\x86\x65\x73\x1c" + "\xfe\xff\xe9\x92\x86\x65\x73\x1c", + .klen = 16, + .iv = "\xca\xfe\xba\xbe\xfa\xce\xdb\xad" + "\xde\xca\xf8\x88", + .ptext = "\xd9\x31\x32\x25\xf8\x84\x06\xe5" + "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a" + "\x86\xa7\xa9\x53\x15\x34\xf7\xda" + "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72" + "\x1c\x3c\x0c\x95\x95\x68\x09\x53" + "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25" + "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57" + "\xba\x63\x7b\x39", + .plen = 60, + .assoc = "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xfe\xed\xfa\xce\xde\xad\xbe\xef" + "\xab\xad\xda\xd2", + .alen = 20, + .ctext = "\xc1\x11\x44\x51\xd9\x25\x87\x5b" + "\x0f\xd9\x06\xf3\x33\x44\xbb\x87" + "\x8b\xa3\x77\xd2\x0c\x60\xfa\xcc" + "\x85\x50\x6f\x96\x0c\x54\x54\xc1" + "\x58\x04\x88\x6e\xf4\x26\x35\x7e" + "\x94\x80\x48\x6c\xf2\xf4\x88\x1f" + "\x19\x63\xea\xae\xba\x81\x1a\x5d" + "\x0e\x6f\x59\x08" + "\x33\xac\x5b\xa8\x19\x60\xdb\x1d" + "\xdd\x2e\x22\x2e\xe0\x87\x51\x5d", + .clen = 76, + }, { + .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 16, + .iv = "\x00\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff", + .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5", + .plen = 719, + .ctext = "\xdc\xb1\x0f\x2a\xe8\x2d\x1c\x57" + "\xc4\x82\xfa\xd6\x87\xe6\x2f\x50" + "\xbd\x9e\x0a\x42\x31\xf2\xc7\xbb" + "\x21\x63\xa7\x05\x43\x33\xef\x33" + "\x5c\xd3\x47\x55\xce\x5c\xe4\xd4" + "\xe5\x07\x62\x22\xac\x01\xa8\x35" + "\x9c\x59\x34\x30\x8e\xff\x9f\xb4" + "\xd2\x4e\x74\x90\x64\xf2\x78\x5e" + "\x63\xb7\xc5\x08\x1b\x37\xa5\x9e" + "\xc0\xde\xff\xa9\x7f\x0b\xd3\x02" + "\x83\x6e\x33\xfa\x43\x11\xd3\xda" + "\x02\xcf\xcd\x4a\xc0\x78\x1f\x39" + "\x62\xcb\xa3\x95\x7e\x13\x92\x28" + "\xb2\xc4\x7a\xba\xd1\xc6\xf6\x1f" + "\xda\x0b\xf1\xd1\x99\x54\xd8\x3b" + "\x16\xf8\xe6\x97\x1e\xa7\xcf\x49" + "\x69\x84\x01\x4c\xdc\x7a\x34\xff" + "\x01\x08\xa3\x0b\x39\xac\x21\x37" + "\xd8\xb4\x04\x19\x8b\x7a\x7d\x17" + "\x44\xd1\x18\xaf\x1f\xa9\x29\xfe" + "\xfa\x77\xe0\x40\x42\x0c\x79\xb7" + "\xc3\x15\x1b\xd9\x0c\x82\xfc\x16" + "\x70\xd6\x2a\xe9\x94\x72\xc5\xa5" + "\x8a\x58\xbc\xfa\xe0\x88\x39\x4a" + "\x80\xe8\xec\xaf\x60\xac\xe7\xf8" + "\x9c\xf0\xfc\x61\x39\x07\x98\x6b" + "\x88\xe3\x98\x22\x28\x18\x4a\x2d" + "\x25\xef\x10\xe3\x83\x66\x3f\xfd" + "\xc7\x0b\xa3\xfd\x97\xa9\xf4\xbd" + "\xd8\x2a\xee\x4a\x50\xad\xcc\xb5" + "\xc7\xab\xb8\x79\x9c\xd1\xf1\x27" + "\x08\xf5\xf5\xe8\x1b\x66\xce\x41" + "\x56\x60\x94\x86\xf0\x78\xc2\xfa" + "\x5b\x63\x40\xb1\xd1\x1a\x38\x69" + "\x0b\x8c\xb2\xf5\xa2\xbe\x90\x9d" + "\x46\x23\x79\x8b\x3b\x4a\xf4\xbb" + "\x55\xf7\x58\x9d\xaf\x59\xff\x74" + "\xf3\xb9\xc4\x26\xb1\xf8\xe1\x28" + "\x8b\x5e\x8f\x6d\x64\xe7\xe8\x63" + "\xd2\x9e\xcb\xee\xae\x19\x04\x1d" + "\x05\xf0\x9d\x99\x7b\x33\x33\xae" + "\x6e\xe5\x09\xdd\x67\x51\xc4\xc8" + "\x6a\xc7\x36\x35\xc9\x93\x76\xa1" + "\xa8\x1c\xfa\x75\x92\x34\x0e\x7d" + "\x3d\x1d\xef\x00\xfd\xa5\x25\x12" + "\x7c\x91\x21\x41\xcc\x50\x47\xa9" + "\x22\x50\x24\x96\x34\x79\x3d\xe8" + "\x3f\xa0\x56\xaf\x98\x53\x55\xc3" + "\x46\x1b\x17\x54\xb8\xb0\xb7\xe0" + "\xe0\xab\x47\x6f\x06\xda\xcc\x75" + "\xa7\x96\xb7\x92\xf3\xa0\x5f\xe6" + "\xba\x97\xe3\x2f\x97\x05\xb2\x99" + "\xa0\x09\x10\x98\x9c\xd3\x2e\xd1" + "\x7e\x2a\x30\x54\x3c\xb9\x33\xe3" + "\xf2\xaf\xd3\xa5\xee\xd0\x0b\x8a" + "\x19\x54\x0f\x02\x51\x1f\x91\xdf" + "\x71\x9c\xad\x77\x35\x28\x55\x6d" + "\xcd\x7a\xd9\xa3\x41\x98\x6b\x37" + "\x19\x0f\xbe\xae\x69\xb2\x25\x01" + "\xee\x0e\x51\x4b\x53\xea\x0f\x5f" + "\x85\x74\x79\x36\x32\x0a\x2a\x40" + "\xad\x6b\x78\x41\x54\x99\xe9\xc1" + "\x2b\x6c\x9b\x42\x21\xef\xe2\x50" + "\x56\x8d\x78\xdf\x58\xbe\x0a\x0f" + "\xfc\xfc\x0d\x2e\xd0\xcb\xa6\x0a" + "\xa8\xd9\x1e\xa9\xd4\x7c\x99\x88" + "\xcf\x11\xad\x1c\xd3\x04\x63\x55" + "\xef\x85\x0b\x69\xa1\x40\xf1\x75" + "\x24\xf4\xe5\x2c\xd4\x7a\x24\x50" + "\x8f\xa2\x71\xc9\x92\x20\xcd\xcf" + "\xda\x40\xbe\xf6\xfe\x1a\xca\xc7" + "\x4a\x80\x45\x55\xcb\xdd\xb7\x01" + "\xb0\x8d\xcb\xd2\xae\xbd\xa4\xd0" + "\x5c\x10\x05\x66\x7b\xd4\xff\xd9" + "\xc4\x23\x9d\x8d\x6b\x24\xf8\x3f" + "\x73\x4d\x5c\x2b\x33\x4c\x5e\x63" + "\x74\x6d\x03\xa1\x7a\x35\x65\x17" + "\x38\x7f\x3b\xc1\x69\xcf\x61\x34" + "\x30\x21\xaf\x97\x47\x12\x3f\xa1" + "\xa7\x50\xc5\x87\xfb\x3f\x70\x32" + "\x86\x17\x5f\x25\xe4\x74\xc6\xd0" + "\x9b\x39\xe6\xe1\x5a\xec\x8f\x40" + "\xce\xcc\x37\x3b\xd8\x72\x1c\x31" + "\x75\xa4\xa6\x89\x8c\xdd\xd6\xd2" + "\x32\x3d\xe8\xc3\x54\xab\x1f\x35" + "\x52\xb4\x94\x81\xb0\x37\x3a\x03" + "\xbb\xb1\x99\x30\xa5\xf8\x21\xcd" + "\x93\x5d\xa7\x13\xed\xc7\x49\x09" + "\x70\xda\x08\x39\xaa\x15\x9e\x45" + "\x35\x2b\x0f\x5c\x8c\x8b\xc9" + "\xa8\xb8\x9f\xfd\x37\x36\x31\x7e" + "\x34\x4f\xc1\xc0\xca\x8a\x22\xfd", + .clen = 735, } }; @@ -14947,6 +15586,282 @@ static const struct aead_testvec sm4_ccm_tv_template[] = { "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A" "\xB3\x32\x56\x97\x1F\xA1\x10\xF4", .clen = 80, + }, { /* Generated from AES-CCM test vectors */ + .key = "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7" + "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf", + .klen = 16, + .iv = "\x01\x00\x00\x00\x03\x02\x01\x00" + "\xa0\xa1\xa2\xa3\xa4\xa5\x00\x00", + .assoc = "\x00\x01\x02\x03\x04\x05\x06\x07", + .alen = 8, + .ptext = "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e", + .plen = 23, + .ctext = "\x7b\xff\x4a\x15\xf5\x73\xce\x82" + "\x6e\xc2\x31\x1d\xe2\x53\x02\xac" + "\xa4\x48\xf9\xe4\xf5\x1f\x81\x70" + "\x18\xbc\xb6\x84\x01\xb8\xae", + .clen = 31, + }, { + .key = "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1" + "\x53\x14\x73\x66\x8d\x88\xf6\x80", + .klen = 16, + .iv = "\x03\xa0\x20\x35\x26\xf2\x21\x8d" + "\x50\x20\xda\xe2\x00\x00\x00\x00", + .assoc = "\x5b\x9e\x13\x67\x02\x5e\xef\xc1" + "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47" + "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d" + "\xd8\x9e\x2b\x56\x10\x23\x56\xe7", + .alen = 32, + .ctext = "\x23\x58\xce\xdc\x40\xb1\xcd\x92" + "\x47\x96\x59\xfc\x8a\x26\x4f\xcf", + .clen = 16, + }, { + .key = "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1" + "\xff\x80\x2e\x48\x7d\x82\xf8\xb9", + .klen = 16, + .iv = "\x03\xaf\x94\x87\x78\x35\x82\x81" + "\x7f\x88\x94\x68\x00\x00\x00\x00", + .alen = 0, + .ptext = "\x00", + .plen = 0, + .ctext = "\x72\x7e\xf5\xd6\x39\x7a\x2b\x43", + .clen = 8, + }, { + .key = "\x39\xbb\xa7\xbe\x59\x97\x9e\x73" + "\xa4\x48\x93\x39\x26\x71\x4a\xc6", + .klen = 16, + .iv = "\x03\xee\x49\x83\xe9\xa9\xff\xe9" + "\x57\xba\xfd\x9e\x00\x00\x00\x00", + .assoc = "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1" + "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64" + "\xa4\xf0\x13\x05\xd1\x77\x99\x67" + "\x11\xc4\xc6\xdb\x00\x56\x36\x61", + .alen = 32, + .ptext = "\x00", + .plen = 0, + .ctext = "\xb0\x9d\xc6\xfb\x7d\xb5\xa1\x0e", + .clen = 8, + }, { + .key = "\x58\x5d\xa0\x96\x65\x1a\x04\xd7" + "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b", + .klen = 16, + .iv = "\x03\xcf\x76\x3f\xd9\x95\x75\x8f" + "\x44\x89\x40\x7b\x00\x00\x00\x00", + .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88" + "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b" + "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b" + "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe", + .alen = 32, + .ptext = "\xc2\x54\xc8\xde\x78\x87\x77\x40" + "\x49\x71\xe4\xb7\xe7\xcb\x76\x61" + "\x0a\x41\xb9\xe9\xc0\x76\x54\xab" + "\x04\x49\x3b\x19\x93\x57\x25\x5d", + .plen = 32, + .ctext = "\xc9\xae\xef\x1d\xf3\x2c\xd3\x38" + "\xc9\x7f\x7e\x28\xe8\xaa\xb3\x60" + "\x49\xdc\x66\xca\x7b\x3d\xe0\x3c" + "\xcb\x45\x9c\x1b\xb2\xbe\x07\x90" + "\x87\xa6\x6b\x89\x0d\x0f\x90\xaa" + "\x7d\xf6\x5a\x9a\x68\x2b\x81\x92", + .clen = 48, + }, { + .key = "\x8b\x32\xcf\xe7\x44\xed\x13\x59" + "\x04\x38\x77\xb0\xb9\xad\xb4\x38", + .klen = 16, + .iv = "\x02\xff\xff\xff\xff\x00\x00\xff" + "\xff\xff\x00\xff\xff\x00\x00\x00", + .assoc = "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88" + "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b" + "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b" + "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe" + "\xc8\xf3\x5c\x52\x10\x63", + .alen = 38, + .ptext = "\x42\xc1\xcc\x08\x48\x6f\x41\x3f" + "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0" + "\x58\x83\xf0\xc3\x70\x14\xc0\x5b" + "\x3f\xec\x1d\x25\x3c\x51\xd2\x03" + "\xcf\x59\x74\x1f\xb2\x85\xb4\x07" + "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb" + "\xaf\x08\x44\xbd\x6f\x91\x15\xe1" + "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50" + "\x59\xa9\x97\xab\xbb\x0e\x74\x5c" + "\x00\xa4\x43\x54\x04\x54\x9b\x3b" + "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08" + "\xae\xe6\x10\x3f\x32\x65\xd1\xfc" + "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3" + "\x35\x23\xf4\x20\x41\xd4\xad\x82" + "\x8b\xa4\xad\x96\x1c\x20\x53\xbe" + "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72" + "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7" + "\xad\x49\x3a\xae\x98\xce\xa6\x66" + "\x10\x30\x90\x8c\x55\x83\xd7\x7c" + "\x8b\xe6\x53\xde\xd2\x6e\x18\x21" + "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73" + "\x57\xcc\x89\x09\x75\x9b\x78\x70" + "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5" + "\xfa\x70\x04\x70\xc6\x96\x1c\x7d" + "\x54\x41\x77\xa8\xe3\xb0\x7e\x96" + "\x82\xd9\xec\xa2\x87\x68\x55\xf9" + "\x8f\x9e\x73\x43\x47\x6a\x08\x36" + "\x93\x67\xa8\x2d\xde\xac\x41\xa9" + "\x5c\x4d\x73\x97\x0f\x70\x68\xfa" + "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9" + "\x78\x1f\x51\x07\xe3\x9a\x13\x4e" + "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7" + "\xab\x19\x37\xd9\xba\x76\x5e\xd2" + "\xf2\x53\x15\x17\x4c\x6b\x16\x9f" + "\x02\x66\x49\xca\x7c\x91\x05\xf2" + "\x45\x36\x1e\xf5\x77\xad\x1f\x46" + "\xa8\x13\xfb\x63\xb6\x08\x99\x63" + "\x82\xa2\xed\xb3\xac\xdf\x43\x19" + "\x45\xea\x78\x73\xd9\xb7\x39\x11" + "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81" + "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79" + "\xa4\x47\x7d\x80\x20\x26\xfd\x63" + "\x0a\xc7\x7e\x6d\x75\x47\xff\x76" + "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b" + "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1" + "\x54\x03\xa4\x09\x0c\x37\x7a\x15" + "\x23\x27\x5b\x8b\x4b\xa5\x64\x97" + "\xae\x4a\x50\x73\x1f\x66\x1c\x5c" + "\x03\x25\x3c\x8d\x48\x58\x71\x34" + "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5" + "\xb6\x19\x2b\x84\x2a\x20\xd1\xea" + "\x80\x6f\x96\x0e\x05\x62\xc7\x78" + "\x87\x79\x60\x38\x46\xb4\x25\x57" + "\x6e\x16\x63\xf8\xad\x6e\xd7\x42" + "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a" + "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22" + "\x86\x5c\x74\x3a\xeb\x24\x26\xc7" + "\x09\xfc\x91\x96\x47\x87\x4f\x1a" + "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24" + "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a" + "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5" + "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb" + "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe" + "\x0b\x63\xde\x87\x42\x79\x8a\x68" + "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f" + "\x9d\xd1\xc7\x45\x90\x08\xc9\x83" + "\xe9\x83\x84\xcb\x28\x69\x09\x69" + "\xce\x99\x46\x00\x54\xcb\xd8\x38" + "\xf9\x53\x4a\xbf\x31\xce\x57\x15" + "\x33\xfa\x96\x04\x33\x42\xe3\xc0" + "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6" + "\x19\x95\xd0\x0e\x82\x07\x63\xf9" + "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9" + "\xb5\x9f\x23\x28\x60\xe7\x20\x51" + "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2" + "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb" + "\x78\xc6\x91\x22\x40\x91\x80\xbe" + "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9" + "\x67\x10\xa4\x83\x98\x79\x23\xe7" + "\x92\xda\xa9\x22\x16\xb1\xe7\x78" + "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37" + "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9" + "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d" + "\x48\x11\x06\xbb\x2d\xf2\x63\x88" + "\x3f\x73\x09\xe2\x45\x56\x31\x51" + "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9" + "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66" + "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23" + "\x59\xfa\xfa\xaa\x44\x04\x01\xa7" + "\xa4\x78\xdb\x74\x3d\x8b\xb5", + .plen = 719, + .ctext = "\xc5\x50\x85\x02\x72\xa8\xb3\x62" + "\xf9\xcd\x77\x7b\x43\xa5\x04\x70" + "\x68\x40\x57\x21\x1c\xfe\xef\x05" + "\x4d\xb8\x44\xba\x59\xea\x62\x32" + "\xcb\x6b\x6a\x39\x9b\xf3\xe5\xa4" + "\x36\x38\xde\x7d\xcf\xb6\xcd\xe3" + "\x89\xbf\x37\xc9\x96\x3c\x70\x10" + "\x92\x47\xcc\xac\x6f\xf8\x55\x9a" + "\x26\x43\x34\xb4\x92\x7d\x68\xfc" + "\x60\x37\x74\x2a\x55\xba\xc7\xd7" + "\x98\x69\xb7\xcf\x42\xfd\xb2\x10" + "\xa0\x59\xe1\x2c\x73\x66\x12\x97" + "\x85\x8b\x28\xcc\x29\x02\x15\x89" + "\x23\xd3\x32\x92\x87\x57\x09\x13" + "\x04\x7e\x8b\x6c\x3a\xc1\x4e\x6c" + "\xe1\x9f\xc8\xcc\x47\x9c\xd8\x10" + "\xf4\xb7\x5c\x30\x7a\x8b\x0f\x01" + "\x52\x38\x02\x92\x99\xac\x03\x90" + "\x18\x32\x2d\x21\x6a\x0a\x2a\xe7" + "\xc2\xcc\x15\x84\x4e\x2b\x0b\x3a" + "\x4c\xdc\xb0\x6b\x10\xd1\x27\x10" + "\xf0\x4a\x5c\x43\xa0\x34\x34\x59" + "\x47\x43\x48\xcb\x69\xa7\xff\x52" + "\xb8\xca\x23\x09\x07\xd7\xc5\xe4" + "\x2a\x4f\x99\xd5\x83\x36\x2a\x2d" + "\x59\xd0\xca\xb0\xfa\x40\x8c\xab" + "\xdf\x69\x08\xd9\x79\x1d\xde\xa8" + "\x0b\x34\x74\x4d\xf5\xa0\x4c\x81" + "\x7f\x93\x06\x40\x24\xfe\x7d\xcd" + "\xe4\xfe\xf8\xf8\x30\xce\xd0\x5d" + "\x70\xfd\x0d\x5a\x78\x85\x74\x2d" + "\xe4\xb5\x40\x18\x99\x11\xe4\x6a" + "\xdf\xfa\x4f\x25\x2c\xde\x15\xb7" + "\x12\xd8\xc6\x90\x0d\x0f\xc9\xfb" + "\x21\xf1\xed\xfe\x98\xe1\x03\xe2" + "\x5c\xef\xb6\xc7\x87\x77\x0e\xcd" + "\xff\x78\x94\xc9\xbe\xd3\x47\xf7" + "\x8d\x37\x48\x01\x42\xe2\x17\x96" + "\xfc\xc0\xcb\x7b\x7b\x57\xaf\x3b" + "\xc9\xd0\x94\xce\x5e\x1b\xa9\x47" + "\x02\x4d\x74\xcc\x45\x1d\xd3\x2d" + "\x5f\x4f\x7f\xf2\x4b\xf9\x59\xee" + "\x9e\x9e\xb9\x95\x29\x19\xd1\x5f" + "\x72\xab\x8d\xf1\x28\xd1\x1c\xae" + "\xc2\xba\xf7\x22\x84\x2c\x83\x51" + "\x03\xad\xa3\xef\x81\xa7\xdc\xf1" + "\x44\x51\x50\x96\x70\xd1\xe5\x47" + "\x57\xf9\x30\x90\xe4\xbf\xfc\x75" + "\x14\xaa\x4d\xb7\xb1\xe7\x79\x33" + "\x43\xc2\x5c\xc1\xbc\x09\x92\x0f" + "\xa7\xaf\x68\x51\x51\xec\x0b\xc3" + "\x3d\x2b\x94\x30\x45\x29\x1b\x9e" + "\x70\x56\xf8\xd6\x67\x2d\x39\x3b" + "\x3c\xd2\xd0\xd3\xdc\x7d\x84\xe9" + "\x06\x31\x98\xa6\x5c\xbf\x10\x58" + "\xce\xbb\xa7\xe1\x65\x7e\x51\x87" + "\x70\x46\xb4\x7f\xf9\xec\x92\x1c" + "\x9b\x24\x49\xc1\x04\xbe\x1c\x5f" + "\xcc\xb3\x33\x8c\xad\xe7\xdc\x32" + "\x54\xa2\x0d\x83\x0f\x3c\x12\x5d" + "\x71\xe3\x9c\xae\x71\xa3\x2a\x10" + "\xc5\x91\xb4\x73\x96\x60\xdb\x5d" + "\x1f\xd5\x9a\xd2\x69\xc3\xd7\x4b" + "\xa2\x66\x81\x96\x4a\xaa\x02\xd6" + "\xd5\x44\x9b\x42\x3a\x15\x5f\xe7" + "\x4d\x7c\xf6\x71\x4a\xea\xe8\x43" + "\xd7\x68\xe4\xbc\x05\x87\x49\x05" + "\x3b\x47\xb2\x6d\x5f\xd1\x11\xa6" + "\x58\xd4\xa2\x45\xec\xb5\x54\x55" + "\xd3\xd6\xd2\x6a\x8b\x21\x9e\x2c" + "\xf1\x27\x4b\x5b\xe3\xff\xe0\xfd" + "\x4b\xf1\xe7\xe2\x84\xf2\x17\x37" + "\x11\x68\xc4\x92\x4b\x6b\xef\x8e" + "\x75\xf5\xc2\x7d\x5c\xe9\x7c\xfc" + "\x2b\x00\x33\x0e\x7d\x69\xd8\xd4" + "\x9b\xa8\x38\x54\x7e\x6d\x23\x51" + "\x2c\xd6\xc4\x58\x23\x1c\x22\x2a" + "\x59\xc5\x9b\xec\x9d\xbf\x03\x0f" + "\xb3\xdd\xba\x02\x22\xa0\x34\x37" + "\x19\x56\xc2\x5b\x32\x1d\x1e\x66" + "\x68\xf4\x47\x05\x04\x18\xa7\x28" + "\x80\xf2\xc7\x99\xed\x1e\x72\x48" + "\x8f\x97\x5d\xb3\x74\x42\xfd\x0c" + "\x0f\x5f\x29\x0c\xf1\x35\x22\x90" + "\xd6\x7c\xb8\xa3\x2a\x89\x38\x71" + "\xe9\x7a\x55\x3c\x3b\xf2\x6e\x1a" + "\x22\x8f\x07\x81\xc1\xe1\xf1\x76" + "\x2a\x75\xab\x86\xc4\xcc\x52\x59" + "\x83\x19\x5e\xb3\x53\xe2\x81\xdf" + "\xe6\x15\xb3\xba\x0c\x0e\xba" + "\xa9\x2c\xed\x51\xd5\x06\xc8\xc6" + "\x4b\x9f\x5d\x1b\x61\x31\xad\xf4", + .clen = 735, } }; @@ -15030,6 +15945,68 @@ static const struct hash_testvec sm4_cmac128_tv_template[] = { } }; +static const struct hash_testvec sm4_xcbc128_tv_template[] = { + { /* Generated from AES-XCBC128 test vectors */ + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = zeroed_string, + .digest = "\xa9\x9a\x5c\x44\xe2\x34\xee\x2c" + "\x9b\xe4\x9d\xca\x64\xb0\xa5\xc4", + .psize = 0, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02", + .digest = "\x17\x27\x62\xf3\x8b\x88\x1d\xc0" + "\x97\x35\x9c\x3e\x9f\x27\xb7\x83", + .psize = 3, + .ksize = 16, + } , { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .digest = "\xda\x45\xd1\xac\xec\x4d\xab\x46" + "\xdd\x59\xe0\x44\xff\x59\xd5\xfc", + .psize = 16, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13", + .digest = "\xbe\x24\x5d\x81\x8c\x8a\x10\xa4" + "\x8e\xc2\x16\xfa\xa4\x83\xc9\x2a", + .psize = 20, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f", + .digest = "\x91\x82\x31\x56\xd5\x77\xa4\xc5" + "\x88\x2d\xce\x3a\x87\x5e\xbd\xba", + .psize = 32, + .ksize = 16, + }, { + .key = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f", + .plaintext = "\x00\x01\x02\x03\x04\x05\x06\x07" + "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" + "\x10\x11\x12\x13\x14\x15\x16\x17" + "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x21", + .digest = "\x2a\xae\xa5\x24\x0c\x12\x9f\x5f" + "\x55\xfb\xae\x35\x13\x0d\x22\x2d", + .psize = 34, + .ksize = 16, + } +}; + /* Cast6 test vectors from RFC 2612 */ static const struct cipher_testvec cast6_tv_template[] = { { diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c index f921f30334f4873b270b3725b320746c8bf9896c..bf4f28742f770a429bd1f992da81785e31e1d4d8 100644 --- a/crypto/twofish_common.c +++ b/crypto/twofish_common.c @@ -25,9 +25,9 @@ * Third Edition. */ +#include #include #include -#include #include #include #include diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c index 86b2f067a4162acad710c0c9fe9125e8c7aa8ef0..557915e4062d7ca55eb62859cca742a5d46d0bfb 100644 --- a/crypto/twofish_generic.c +++ b/crypto/twofish_generic.c @@ -25,12 +25,12 @@ */ #include +#include #include #include #include #include #include -#include #include /* Macros to compute the g() function in the encryption and decryption diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c index e8ad41387f84c19452a306b9bba2c1e147d273dd..b082eb942a0f86f3ac37c667016f0e3e65ca83d2 100644 --- a/drivers/acpi/acpica/dswexec.c +++ b/drivers/acpi/acpica/dswexec.c @@ -389,9 +389,11 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state) /* * All opcodes require operand resolution, with the only exceptions - * being the object_type and size_of operators. + * being the object_type and size_of operators as well as opcodes that + * take no arguments. */ - if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE)) { + if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE) && + (walk_state->op_info->flags & AML_HAS_ARGS)) { /* Resolve all operands */ diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c index 0a8372bf6a77d199ae1b5955b5c2e52f19065527..a5c19f46ec17dfe87775bc6562072778a1380c9f 100644 --- a/drivers/acpi/acpica/evxfregn.c +++ b/drivers/acpi/acpica/evxfregn.c @@ -20,13 +20,14 @@ ACPI_MODULE_NAME("evxfregn") /******************************************************************************* * - * FUNCTION: acpi_install_address_space_handler + * FUNCTION: acpi_install_address_space_handler_internal * * PARAMETERS: device - Handle for the device * space_id - The address space ID * handler - Address of the handler * setup - Address of the setup function * context - Value passed to the handler on each access + * Run_reg - Run _REG methods for this address space? * * RETURN: Status * @@ -37,13 +38,16 @@ ACPI_MODULE_NAME("evxfregn") * are executed here, and these methods can only be safely executed after * the default handlers have been installed and the hardware has been * initialized (via acpi_enable_subsystem.) + * To avoid this problem pass FALSE for Run_Reg and later on call + * acpi_execute_reg_methods() to execute _REG. * ******************************************************************************/ -acpi_status -acpi_install_address_space_handler(acpi_handle device, - acpi_adr_space_type space_id, - acpi_adr_space_handler handler, - acpi_adr_space_setup setup, void *context) +static acpi_status +acpi_install_address_space_handler_internal(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, + void *context, u8 run_reg) { struct acpi_namespace_node *node; acpi_status status; @@ -80,14 +84,40 @@ acpi_install_address_space_handler(acpi_handle device, /* Run all _REG methods for this address space */ - acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + if (run_reg) { + acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + } unlock_and_exit: (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); return_ACPI_STATUS(status); } +acpi_status +acpi_install_address_space_handler(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, void *context) +{ + return acpi_install_address_space_handler_internal(device, space_id, + handler, setup, + context, TRUE); +} + ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler) +acpi_status +acpi_install_address_space_handler_no_reg(acpi_handle device, + acpi_adr_space_type space_id, + acpi_adr_space_handler handler, + acpi_adr_space_setup setup, + void *context) +{ + return acpi_install_address_space_handler_internal(device, space_id, + handler, setup, + context, FALSE); +} + +ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler_no_reg) /******************************************************************************* * @@ -228,3 +258,51 @@ unlock_and_exit: } ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler) +/******************************************************************************* + * + * FUNCTION: acpi_execute_reg_methods + * + * PARAMETERS: device - Handle for the device + * space_id - The address space ID + * + * RETURN: Status + * + * DESCRIPTION: Execute _REG for all op_regions of a given space_id. + * + ******************************************************************************/ +acpi_status +acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id) +{ + struct acpi_namespace_node *node; + acpi_status status; + + ACPI_FUNCTION_TRACE(acpi_execute_reg_methods); + + /* Parameter validation */ + + if (!device) { + return_ACPI_STATUS(AE_BAD_PARAMETER); + } + + status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE); + if (ACPI_FAILURE(status)) { + return_ACPI_STATUS(status); + } + + /* Convert and validate the device handle */ + + node = acpi_ns_validate_handle(device); + if (node) { + + /* Run all _REG methods for this address space */ + + acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT); + } else { + status = AE_BAD_PARAMETER; + } + + (void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE); + return_ACPI_STATUS(status); +} + +ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods) diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 2520fb998ce60ae30e7092dbde9121a019212a1e..105d2e795afadd16afb1cdc46bd3f9150faf93bc 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -94,6 +94,7 @@ enum { EC_FLAGS_QUERY_ENABLED, /* Query is enabled */ EC_FLAGS_EVENT_HANDLER_INSTALLED, /* Event handler installed */ EC_FLAGS_EC_HANDLER_INSTALLED, /* OpReg handler installed */ + EC_FLAGS_EC_REG_CALLED, /* OpReg ACPI _REG method called */ EC_FLAGS_QUERY_METHODS_INSTALLED, /* _Qxx handlers installed */ EC_FLAGS_STARTED, /* Driver is started */ EC_FLAGS_STOPPED, /* Driver is stopped */ @@ -1446,6 +1447,7 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) * ec_install_handlers - Install service callbacks and register query methods. * @ec: Target EC. * @device: ACPI device object corresponding to @ec. + * @call_reg: If _REG should be called to notify OpRegion availability * * Install a handler for the EC address space type unless it has been installed * already. If @device is not NULL, also look for EC query methods in the @@ -1458,7 +1460,8 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec) * -EPROBE_DEFER if GPIO IRQ acquisition needs to be deferred, * or 0 (success) otherwise. */ -static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) +static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device, + bool call_reg) { acpi_status status; @@ -1466,15 +1469,21 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { acpi_ec_enter_noirq(ec); - status = acpi_install_address_space_handler(ec->handle, - ACPI_ADR_SPACE_EC, - &acpi_ec_space_handler, - NULL, ec); + status = acpi_install_address_space_handler_no_reg(ec->handle, + ACPI_ADR_SPACE_EC, + &acpi_ec_space_handler, + NULL, ec); if (ACPI_FAILURE(status)) { acpi_ec_stop(ec, false); return -ENODEV; } set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags); + ec->address_space_handler_holder = ec->handle; + } + + if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) { + acpi_execute_reg_methods(ec->handle, ACPI_ADR_SPACE_EC); + set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags); } if (!device) @@ -1526,7 +1535,8 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device) static void ec_remove_handlers(struct acpi_ec *ec) { if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) { - if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle, + if (ACPI_FAILURE(acpi_remove_address_space_handler( + ec->address_space_handler_holder, ACPI_ADR_SPACE_EC, &acpi_ec_space_handler))) pr_err("failed to remove space handler\n"); clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags); @@ -1562,11 +1572,11 @@ static void ec_remove_handlers(struct acpi_ec *ec) } } -static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device) +static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool call_reg) { int ret; - ret = ec_install_handlers(ec, device); + ret = ec_install_handlers(ec, device, call_reg); if (ret) return ret; @@ -1631,7 +1641,7 @@ static int acpi_ec_add(struct acpi_device *device) } } - ret = acpi_ec_setup(ec, device); + ret = acpi_ec_setup(ec, device, true); if (ret) goto err; @@ -1750,7 +1760,7 @@ void __init acpi_ec_dsdt_probe(void) * At this point, the GPE is not fully initialized, so do not to * handle the events. */ - ret = acpi_ec_setup(ec, NULL); + ret = acpi_ec_setup(ec, NULL, true); if (ret) { acpi_ec_free(ec); return; @@ -1944,7 +1954,7 @@ void __init acpi_ec_ecdt_probe(void) * At this point, the namespace is not initialized, so do not find * the namespace objects, or handle the events. */ - ret = acpi_ec_setup(ec, NULL); + ret = acpi_ec_setup(ec, NULL, false); if (ret) { acpi_ec_free(ec); goto out; diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h index 219c02df9a08c720dcca32559f47d3770b0507c2..ec584442fb298861d75b51adafe9732f5ec09290 100644 --- a/drivers/acpi/internal.h +++ b/drivers/acpi/internal.h @@ -173,6 +173,7 @@ enum acpi_ec_event_state { struct acpi_ec { acpi_handle handle; + acpi_handle address_space_handler_holder; int gpe; int irq; unsigned long command_addr; diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 7c3590fd97c28d8977f2be0c6789264ff619c07a..017c4cdb219eb115e0bc5ce93269c24ff2ff4a70 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -821,7 +821,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name, * called by a driver when serving an unrelated request from userland, we use * the kernel credentials to read the file. */ - kern_cred = prepare_kernel_cred(NULL); + kern_cred = prepare_kernel_cred(&init_task); if (!kern_cred) { ret = -ENOMEM; goto out; diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c index 2b2095542816b84fcfbb0333a3efa80d7fd9ea8b..55397ba765d234ec43019d8d412e32f50c2ec7b8 100644 --- a/drivers/char/agp/amd-k7-agp.c +++ b/drivers/char/agp/amd-k7-agp.c @@ -488,26 +488,11 @@ static void agp_amdk7_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM - -static int agp_amdk7_suspend(struct pci_dev *pdev, pm_message_t state) +static int agp_amdk7_resume(struct device *dev) { - pci_save_state(pdev); - pci_set_power_state(pdev, pci_choose_state(pdev, state)); - - return 0; -} - -static int agp_amdk7_resume(struct pci_dev *pdev) -{ - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - return amd_irongate_driver.configure(); } -#endif /* CONFIG_PM */ - /* must be the same order as name table above */ static const struct pci_device_id agp_amdk7_pci_table[] = { { @@ -539,15 +524,14 @@ static const struct pci_device_id agp_amdk7_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_amdk7_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_amdk7_pm_ops, NULL, agp_amdk7_resume); + static struct pci_driver agp_amdk7_pci_driver = { .name = "agpgart-amdk7", .id_table = agp_amdk7_pci_table, .probe = agp_amdk7_probe, .remove = agp_amdk7_remove, -#ifdef CONFIG_PM - .suspend = agp_amdk7_suspend, - .resume = agp_amdk7_resume, -#endif + .driver.pm = &agp_amdk7_pm_ops, }; static int __init agp_amdk7_init(void) diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 84a4aa9312cf8d845990f7b06ebc328b41064c7f..ce8651436609fcefa2259cf7ae1704bc1e933b0c 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -588,9 +588,7 @@ static void agp_amd64_remove(struct pci_dev *pdev) agp_bridges_found--; } -#define agp_amd64_suspend NULL - -static int __maybe_unused agp_amd64_resume(struct device *dev) +static int agp_amd64_resume(struct device *dev) { struct pci_dev *pdev = to_pci_dev(dev); @@ -727,7 +725,7 @@ static const struct pci_device_id agp_amd64_pci_promisc_table[] = { { } }; -static SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, agp_amd64_suspend, agp_amd64_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume); static struct pci_driver agp_amd64_pci_driver = { .name = "agpgart-amd64", diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c index 6f5530482d8332496fc48757f2d2a45ed26f96ea..3c1fce48aabe764d91b1381559d7a5c916c4fb53 100644 --- a/drivers/char/agp/ati-agp.c +++ b/drivers/char/agp/ati-agp.c @@ -238,23 +238,10 @@ static int ati_configure(void) } -#ifdef CONFIG_PM -static int agp_ati_suspend(struct pci_dev *dev, pm_message_t state) +static int agp_ati_resume(struct device *dev) { - pci_save_state(dev); - pci_set_power_state(dev, PCI_D3hot); - - return 0; -} - -static int agp_ati_resume(struct pci_dev *dev) -{ - pci_set_power_state(dev, PCI_D0); - pci_restore_state(dev); - return ati_configure(); } -#endif /* *Since we don't need contiguous memory we just try @@ -559,15 +546,14 @@ static const struct pci_device_id agp_ati_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_ati_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_ati_pm_ops, NULL, agp_ati_resume); + static struct pci_driver agp_ati_pci_driver = { .name = "agpgart-ati", .id_table = agp_ati_pci_table, .probe = agp_ati_probe, .remove = agp_ati_remove, -#ifdef CONFIG_PM - .suspend = agp_ati_suspend, - .resume = agp_ati_resume, -#endif + .driver.pm = &agp_ati_pm_ops, }; static int __init agp_ati_init(void) diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c index c53f0f9ef5b0b2db8bcd835163ef30e0264b6c37..f28d42319269579a0e05110486475104816fae16 100644 --- a/drivers/char/agp/efficeon-agp.c +++ b/drivers/char/agp/efficeon-agp.c @@ -412,18 +412,11 @@ static void agp_efficeon_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state) -{ - return 0; -} - -static int agp_efficeon_resume(struct pci_dev *pdev) +static int agp_efficeon_resume(struct device *dev) { printk(KERN_DEBUG PFX "agp_efficeon_resume()\n"); return efficeon_configure(); } -#endif static const struct pci_device_id agp_efficeon_pci_table[] = { { @@ -437,6 +430,8 @@ static const struct pci_device_id agp_efficeon_pci_table[] = { { } }; +static DEFINE_SIMPLE_DEV_PM_OPS(agp_efficeon_pm_ops, NULL, agp_efficeon_resume); + MODULE_DEVICE_TABLE(pci, agp_efficeon_pci_table); static struct pci_driver agp_efficeon_pci_driver = { @@ -444,10 +439,7 @@ static struct pci_driver agp_efficeon_pci_driver = { .id_table = agp_efficeon_pci_table, .probe = agp_efficeon_probe, .remove = agp_efficeon_remove, -#ifdef CONFIG_PM - .suspend = agp_efficeon_suspend, - .resume = agp_efficeon_resume, -#endif + .driver.pm = &agp_efficeon_pm_ops, }; static int __init agp_efficeon_init(void) diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c index 9e4f27a6cb5afad0034f1636a12a2b775cb36663..c518b3a9db04d6eca1c529d6790f0b9116965fb7 100644 --- a/drivers/char/agp/intel-agp.c +++ b/drivers/char/agp/intel-agp.c @@ -817,16 +817,15 @@ static void agp_intel_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_intel_resume(struct pci_dev *pdev) +static int agp_intel_resume(struct device *dev) { + struct pci_dev *pdev = to_pci_dev(dev); struct agp_bridge_data *bridge = pci_get_drvdata(pdev); bridge->driver->configure(); return 0; } -#endif static const struct pci_device_id agp_intel_pci_table[] = { #define ID(x) \ @@ -895,14 +894,14 @@ static const struct pci_device_id agp_intel_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_intel_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_intel_pm_ops, NULL, agp_intel_resume); + static struct pci_driver agp_intel_pci_driver = { .name = "agpgart-intel", .id_table = agp_intel_pci_table, .probe = agp_intel_probe, .remove = agp_intel_remove, -#ifdef CONFIG_PM - .resume = agp_intel_resume, -#endif + .driver.pm = &agp_intel_pm_ops, }; static int __init agp_intel_init(void) diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c index 826dbd06f6bbb1eaac6c8fff2a770fb5b577d5cd..dbcbc06cc20293b71ea9088b1297178347233ce1 100644 --- a/drivers/char/agp/nvidia-agp.c +++ b/drivers/char/agp/nvidia-agp.c @@ -404,28 +404,13 @@ static void agp_nvidia_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#ifdef CONFIG_PM -static int agp_nvidia_suspend(struct pci_dev *pdev, pm_message_t state) +static int agp_nvidia_resume(struct device *dev) { - pci_save_state(pdev); - pci_set_power_state(pdev, PCI_D3hot); - - return 0; -} - -static int agp_nvidia_resume(struct pci_dev *pdev) -{ - /* set power state 0 and restore PCI space */ - pci_set_power_state(pdev, PCI_D0); - pci_restore_state(pdev); - /* reconfigure AGP hardware again */ nvidia_configure(); return 0; } -#endif - static const struct pci_device_id agp_nvidia_pci_table[] = { { @@ -449,15 +434,14 @@ static const struct pci_device_id agp_nvidia_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_nvidia_pci_table); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_nvidia_pm_ops, NULL, agp_nvidia_resume); + static struct pci_driver agp_nvidia_pci_driver = { .name = "agpgart-nvidia", .id_table = agp_nvidia_pci_table, .probe = agp_nvidia_probe, .remove = agp_nvidia_remove, -#ifdef CONFIG_PM - .suspend = agp_nvidia_suspend, - .resume = agp_nvidia_resume, -#endif + .driver.pm = &agp_nvidia_pm_ops, }; static int __init agp_nvidia_init(void) diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c index f8a02f4bef1bf3b97c700136f0c8a58e927cdd5d..484bb101c53b524f8540186251c43edda009ec77 100644 --- a/drivers/char/agp/sis-agp.c +++ b/drivers/char/agp/sis-agp.c @@ -217,10 +217,7 @@ static void agp_sis_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#define agp_sis_suspend NULL - -static int __maybe_unused agp_sis_resume( - __attribute__((unused)) struct device *dev) +static int agp_sis_resume(__attribute__((unused)) struct device *dev) { return sis_driver.configure(); } @@ -407,7 +404,7 @@ static const struct pci_device_id agp_sis_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_sis_pci_table); -static SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, agp_sis_suspend, agp_sis_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, NULL, agp_sis_resume); static struct pci_driver agp_sis_pci_driver = { .name = "agpgart-sis", diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c index b2f484f527fb786bd54908849f141f1d9ea9b53f..bc5140af2dcbe752532e1d8360d279308a18c069 100644 --- a/drivers/char/agp/via-agp.c +++ b/drivers/char/agp/via-agp.c @@ -489,9 +489,7 @@ static void agp_via_remove(struct pci_dev *pdev) agp_put_bridge(bridge); } -#define agp_via_suspend NULL - -static int __maybe_unused agp_via_resume(struct device *dev) +static int agp_via_resume(struct device *dev) { struct agp_bridge_data *bridge = dev_get_drvdata(dev); @@ -551,7 +549,7 @@ static const struct pci_device_id agp_via_pci_table[] = { MODULE_DEVICE_TABLE(pci, agp_via_pci_table); -static SIMPLE_DEV_PM_OPS(agp_via_pm_ops, agp_via_suspend, agp_via_resume); +static DEFINE_SIMPLE_DEV_PM_OPS(agp_via_pm_ops, NULL, agp_via_resume); static struct pci_driver agp_via_pci_driver = { .name = "agpgart-via", diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c index c22d4184bb612a61c83762e0b0feaf21f118fe15..0555e3838bce1fbead6536a0f8a841aecbec5207 100644 --- a/drivers/char/hw_random/amd-rng.c +++ b/drivers/char/hw_random/amd-rng.c @@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void) found: err = pci_read_config_dword(pdev, 0x58, &pmbase); if (err) - return err; + goto put_dev; pmbase &= 0x0000FF00; - if (pmbase == 0) - return -EIO; + if (pmbase == 0) { + err = -EIO; + goto put_dev; + } priv = kzalloc(sizeof(*priv), GFP_KERNEL); - if (!priv) - return -ENOMEM; + if (!priv) { + err = -ENOMEM; + goto put_dev; + } if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) { dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n", @@ -185,6 +189,8 @@ err_iomap: release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE); out: kfree(priv); +put_dev: + pci_dev_put(pdev); return err; } @@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void) release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE); + pci_dev_put(priv->pcidev); + kfree(priv); } diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c index 7c55f4cf4a8ba69b27185fc27432c363e6d8acc9..c99c54cd99c67675bd6b679dbd57986feaeb40a7 100644 --- a/drivers/char/hw_random/cavium-rng-vf.c +++ b/drivers/char/hw_random/cavium-rng-vf.c @@ -225,7 +225,6 @@ static int cavium_rng_probe_vf(struct pci_dev *pdev, return -ENOMEM; rng->ops.read = cavium_rng_read; - rng->ops.quality = 1000; pci_set_drvdata(pdev, rng); diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c index a01e9307737c5e11eea9ccd9dd5c10fe724ac04b..c1193f85982c36cae1cd1bf57db09f9ceb99b9a6 100644 --- a/drivers/char/hw_random/cn10k-rng.c +++ b/drivers/char/hw_random/cn10k-rng.c @@ -145,7 +145,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id) return -ENOMEM; rng->ops.read = cn10k_rng_read; - rng->ops.quality = 1000; rng->ops.priv = (unsigned long)rng; reset_rng_health_state(rng); diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 63a0a8e4505d2b232714b92af6c71d3d23fc9cea..f34d356fe2c06104c8a9cde73f0b0aa88a4d6671 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -41,14 +41,14 @@ static DEFINE_MUTEX(reading_mutex); static int data_avail; static u8 *rng_buffer, *rng_fillbuf; static unsigned short current_quality; -static unsigned short default_quality; /* = 0; default to "off" */ +static unsigned short default_quality = 1024; /* default to maximum */ module_param(current_quality, ushort, 0644); MODULE_PARM_DESC(current_quality, "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead"); module_param(default_quality, ushort, 0644); MODULE_PARM_DESC(default_quality, - "default entropy content of hwrng per 1024 bits of input"); + "default maximum entropy content of hwrng per 1024 bits of input"); static void drop_current_rng(void); static int hwrng_init(struct hwrng *rng); @@ -172,10 +172,7 @@ static int hwrng_init(struct hwrng *rng) reinit_completion(&rng->cleanup_done); skip_init: - if (!rng->quality) - rng->quality = default_quality; - if (rng->quality > 1024) - rng->quality = 1024; + rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024); current_quality = rng->quality; /* obsolete */ return 0; diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c index 138ce434f86b20b7552ec962cb8267115b97c059..12fbe809183190209c371a3983b1270e2f3871f1 100644 --- a/drivers/char/hw_random/geode-rng.c +++ b/drivers/char/hw_random/geode-rng.c @@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, pci_tbl); +struct amd_geode_priv { + struct pci_dev *pcidev; + void __iomem *membase; +}; static int geode_rng_data_read(struct hwrng *rng, u32 *data) { @@ -90,6 +94,7 @@ static int __init geode_rng_init(void) const struct pci_device_id *ent; void __iomem *mem; unsigned long rng_base; + struct amd_geode_priv *priv; for_each_pci_dev(pdev) { ent = pci_match_id(pci_tbl, pdev); @@ -97,17 +102,26 @@ static int __init geode_rng_init(void) goto found; } /* Device not found. */ - goto out; + return err; found: + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) { + err = -ENOMEM; + goto put_dev; + } + rng_base = pci_resource_start(pdev, 0); if (rng_base == 0) - goto out; + goto free_priv; err = -ENOMEM; mem = ioremap(rng_base, 0x58); if (!mem) - goto out; - geode_rng.priv = (unsigned long)mem; + goto free_priv; + + geode_rng.priv = (unsigned long)priv; + priv->membase = mem; + priv->pcidev = pdev; pr_info("AMD Geode RNG detected\n"); err = hwrng_register(&geode_rng); @@ -116,20 +130,26 @@ found: err); goto err_unmap; } -out: return err; err_unmap: iounmap(mem); - goto out; +free_priv: + kfree(priv); +put_dev: + pci_dev_put(pdev); + return err; } static void __exit geode_rng_exit(void) { - void __iomem *mem = (void __iomem *)geode_rng.priv; + struct amd_geode_priv *priv; + priv = (struct amd_geode_priv *)geode_rng.priv; hwrng_unregister(&geode_rng); - iounmap(mem); + iounmap(priv->membase); + pci_dev_put(priv->pcidev); + kfree(priv); } module_init(geode_rng_init); diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c index 5813da617a485e9afb5b4840a691b34e23b4bd16..c6972734ae62e8e31b734b10beaa6ceed13af326 100644 --- a/drivers/char/hw_random/mpfs-rng.c +++ b/drivers/char/hw_random/mpfs-rng.c @@ -78,7 +78,6 @@ static int mpfs_rng_probe(struct platform_device *pdev) rng_priv->rng.read = mpfs_rng_read; rng_priv->rng.name = pdev->name; - rng_priv->rng.quality = 1024; platform_set_drvdata(pdev, rng_priv); diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c index 6c00ea0085553a60c246837f626e3a69c5ae01cc..aa993753ab120bcfaff7b14fbdd36524e7a6fc0d 100644 --- a/drivers/char/hw_random/mtk-rng.c +++ b/drivers/char/hw_random/mtk-rng.c @@ -22,7 +22,7 @@ #define RNG_AUTOSUSPEND_TIMEOUT 100 #define USEC_POLL 2 -#define TIMEOUT_POLL 20 +#define TIMEOUT_POLL 60 #define RNG_CTRL 0x00 #define RNG_EN BIT(0) @@ -77,7 +77,7 @@ static bool mtk_rng_wait_ready(struct hwrng *rng, bool wait) readl_poll_timeout_atomic(priv->base + RNG_CTRL, ready, ready & RNG_READY, USEC_POLL, TIMEOUT_POLL); - return !!ready; + return !!(ready & RNG_READY); } static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -179,6 +179,7 @@ static const struct dev_pm_ops mtk_rng_pm_ops = { #endif /* CONFIG_PM */ static const struct of_device_id mtk_rng_match[] = { + { .compatible = "mediatek,mt7986-rng" }, { .compatible = "mediatek,mt7623-rng" }, {}, }; diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c index 1ec5f267a6566d10ec536cecd367e671ae5a78c0..9903d0357e06ea425e915b0b66a7e2820dd3a1f2 100644 --- a/drivers/char/hw_random/npcm-rng.c +++ b/drivers/char/hw_random/npcm-rng.c @@ -13,11 +13,13 @@ #include #include #include +#include #define NPCM_RNGCS_REG 0x00 /* Control and status register */ #define NPCM_RNGD_REG 0x04 /* Data register */ #define NPCM_RNGMODE_REG 0x08 /* Mode register */ +#define NPCM_RNG_CLK_SET_62_5MHZ BIT(2) /* 60-80 MHz */ #define NPCM_RNG_CLK_SET_25MHZ GENMASK(4, 3) /* 20-25 MHz */ #define NPCM_RNG_DATA_VALID BIT(1) #define NPCM_RNG_ENABLE BIT(0) @@ -31,14 +33,14 @@ struct npcm_rng { void __iomem *base; struct hwrng rng; + u32 clkp; }; static int npcm_rng_init(struct hwrng *rng) { struct npcm_rng *priv = to_npcm_rng(rng); - writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE, - priv->base + NPCM_RNGCS_REG); + writel(priv->clkp | NPCM_RNG_ENABLE, priv->base + NPCM_RNGCS_REG); return 0; } @@ -47,7 +49,7 @@ static void npcm_rng_cleanup(struct hwrng *rng) { struct npcm_rng *priv = to_npcm_rng(rng); - writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG); + writel(priv->clkp, priv->base + NPCM_RNGCS_REG); } static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait) @@ -109,7 +111,7 @@ static int npcm_rng_probe(struct platform_device *pdev) priv->rng.name = pdev->name; priv->rng.read = npcm_rng_read; priv->rng.priv = (unsigned long)&pdev->dev; - priv->rng.quality = 1000; + priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev); writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG); @@ -162,7 +164,10 @@ static const struct dev_pm_ops npcm_rng_pm_ops = { }; static const struct of_device_id rng_dt_id[] __maybe_unused = { - { .compatible = "nuvoton,npcm750-rng", }, + { .compatible = "nuvoton,npcm750-rng", + .data = (void *)NPCM_RNG_CLK_SET_25MHZ }, + { .compatible = "nuvoton,npcm845-rng", + .data = (void *)NPCM_RNG_CLK_SET_62_5MHZ }, {}, }; MODULE_DEVICE_TABLE(of, rng_dt_id); diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c index 795853dfc46b70a0f38b4a803780971b8900fb6a..cffa326ddc8d3b1738621c0bd8a24d08f1458016 100644 --- a/drivers/char/hw_random/s390-trng.c +++ b/drivers/char/hw_random/s390-trng.c @@ -191,7 +191,6 @@ static struct hwrng trng_hwrng_dev = { .name = "s390-trng", .data_read = trng_hwrng_data_read, .read = trng_hwrng_read, - .quality = 1024, }; diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c index bc22178f83e83ac80cd50677a63da2ef07995136..a6731cf0627ac06c7202ce1d2bca604cfae1ca07 100644 --- a/drivers/char/hw_random/stm32-rng.c +++ b/drivers/char/hw_random/stm32-rng.c @@ -44,16 +44,18 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait) pm_runtime_get_sync((struct device *) priv->rng.priv); - while (max > sizeof(u32)) { + while (max >= sizeof(u32)) { sr = readl_relaxed(priv->base + RNG_SR); /* Manage timeout which is based on timer and take */ /* care of initial delay time when enabling rng */ if (!sr && wait) { - retval = readl_relaxed_poll_timeout_atomic(priv->base + int err; + + err = readl_relaxed_poll_timeout_atomic(priv->base + RNG_SR, sr, sr, 10, 50000); - if (retval) + if (err) dev_err((struct device *)priv->rng.priv, "%s: timeout %x!\n", __func__, sr); } diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 8ea1fc831eb7bac773848fc39ea9f63742217ba3..26f322d19a883f9672473c2c86aa733432ca8113 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -145,8 +145,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev) if (!of_property_read_u32(pdev->dev.of_node, "quality", &i)) priv->rng_ops.quality = i; - else - priv->rng_ops.quality = 0; } else { period = pdata->period; priv->rng_ops.quality = pdata->quality; diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c index a6f3a8a2aca6d1c9091a21da56d3efea38ba74e9..f7690e0f92ede2f1ed29afc7de5943443c4b0988 100644 --- a/drivers/char/hw_random/virtio-rng.c +++ b/drivers/char/hw_random/virtio-rng.c @@ -148,7 +148,6 @@ static int probe_common(struct virtio_device *vdev) .cleanup = virtio_cleanup, .priv = (unsigned long)vi, .name = vi->name, - .quality = 1000, }; vdev->priv = vi; diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 55e75fbb658ee1b5fd46a10fef0dde1e6dd3125d..dfb103f81a64b35f4f9e4a8034ab75169dcc18d6 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -669,7 +669,12 @@ config CRYPTO_DEV_IMGTEC_HASH config CRYPTO_DEV_ROCKCHIP tristate "Rockchip's Cryptographic Engine driver" depends on OF && ARCH_ROCKCHIP + depends on PM + select CRYPTO_ECB + select CRYPTO_CBC + select CRYPTO_DES select CRYPTO_AES + select CRYPTO_ENGINE select CRYPTO_LIB_DES select CRYPTO_MD5 select CRYPTO_SHA1 @@ -681,6 +686,16 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +config CRYPTO_DEV_ROCKCHIP_DEBUG + bool "Enable Rockchip crypto stats" + depends on CRYPTO_DEV_ROCKCHIP + depends on DEBUG_FS + help + Say y to enable Rockchip crypto debug stats. + This will create /sys/kernel/debug/rk3288_crypto/stats for displaying + the number of requests per algorithm and other internal stats. + + config CRYPTO_DEV_ZYNQMP_AES tristate "Support for Xilinx ZynqMP AES hw accelerator" depends on ZYNQMP_FIRMWARE || COMPILE_TEST @@ -785,8 +800,8 @@ config CRYPTO_DEV_CCREE select CRYPTO_ECB select CRYPTO_CTR select CRYPTO_XTS - select CRYPTO_SM4 - select CRYPTO_SM3 + select CRYPTO_SM4_GENERIC + select CRYPTO_SM3_GENERIC help Say 'Y' to enable a driver for the REE interface of the Arm TrustZone CryptoCell family of processors. Currently the diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 116de173a66c595cfb536af3c83a3f8b492146da..fa8bf1be1a8cded5ed69fd66630f9af25abf1fe6 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -41,7 +41,7 @@ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/ -obj-$(CONFIG_ARCH_STM32) += stm32/ +obj-y += stm32/ obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/ diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c index c4b0a8b588429b7f62fdfb37d2710c75ace7d9e6..e2b9b9104694172fa264e275eae21de8b69b0ee3 100644 --- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c +++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c @@ -108,7 +108,6 @@ int sun8i_ce_hwrng_register(struct sun8i_ce_dev *ce) } ce->trng.name = "sun8i Crypto Engine TRNG"; ce->trng.read = sun8i_ce_trng_read; - ce->trng.quality = 1000; ret = hwrng_register(&ce->trng); if (ret) diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c index 910d6751644cf673ea218cb346d2cf6e7ba6ac8a..902f6be057ec6cd7db03b433d2301dae542aa2ab 100644 --- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c +++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c @@ -124,7 +124,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq) unsigned int ivsize = crypto_skcipher_ivsize(tfm); struct sun8i_ss_flow *sf = &ss->flows[rctx->flow]; int i = 0; - u32 a; + dma_addr_t a; int err; rctx->ivlen = ivsize; diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c index 6e7ae896717cd2a21600f4282fc27145c3e18da3..937187027ad574acd8eeaf62f34680e587b60e02 100644 --- a/drivers/crypto/amlogic/amlogic-gxl-core.c +++ b/drivers/crypto/amlogic/amlogic-gxl-core.c @@ -237,7 +237,6 @@ static int meson_crypto_probe(struct platform_device *pdev) return err; } - mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL); for (i = 0; i < MAXFLOW; i++) { mc->irqs[i] = platform_get_irq(pdev, i); if (mc->irqs[i] < 0) diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h index dc0f142324a3c042c5076fc438c900b1221dbac8..8c0746a1d6d43c30e34bb2718e6eb0e938247d86 100644 --- a/drivers/crypto/amlogic/amlogic-gxl.h +++ b/drivers/crypto/amlogic/amlogic-gxl.h @@ -95,7 +95,7 @@ struct meson_dev { struct device *dev; struct meson_flow *chanlist; atomic_t flow; - int *irqs; + int irqs[MAXFLOW]; #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG struct dentry *dbgfs_dir; #endif diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c index 82bf15d4956144710d95124602d0a574d396b9f3..53100fb9b07bdb9fe360916f93d76c46c46cb4cc 100644 --- a/drivers/crypto/atmel-ecc.c +++ b/drivers/crypto/atmel-ecc.c @@ -311,9 +311,9 @@ static struct kpp_alg atmel_ecdh_nist_p256 = { }, }; -static int atmel_ecc_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atmel_ecc_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; @@ -390,7 +390,7 @@ static struct i2c_driver atmel_ecc_driver = { .name = "atmel-ecc", .of_match_table = of_match_ptr(atmel_ecc_dt_ids), }, - .probe = atmel_ecc_probe, + .probe_new = atmel_ecc_probe, .remove = atmel_ecc_remove, .id_table = atmel_ecc_id, }; diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c index a84b657598c6e854053c83141634484af0e1220a..272a06f0b5886f8bb19784d0ef11c5a7a86b6252 100644 --- a/drivers/crypto/atmel-sha204a.c +++ b/drivers/crypto/atmel-sha204a.c @@ -91,9 +91,9 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max, return max; } -static int atmel_sha204a_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int atmel_sha204a_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct atmel_i2c_client_priv *i2c_priv; int ret; @@ -107,7 +107,6 @@ static int atmel_sha204a_probe(struct i2c_client *client, i2c_priv->hwrng.name = dev_name(&client->dev); i2c_priv->hwrng.read = atmel_sha204a_rng_read; - i2c_priv->hwrng.quality = 1024; ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng); if (ret) @@ -143,7 +142,7 @@ static const struct i2c_device_id atmel_sha204a_id[] = { MODULE_DEVICE_TABLE(i2c, atmel_sha204a_id); static struct i2c_driver atmel_sha204a_driver = { - .probe = atmel_sha204a_probe, + .probe_new = atmel_sha204a_probe, .remove = atmel_sha204a_remove, .id_table = atmel_sha204a_id, diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c index 6345c7269eb030393dc45459786d21bb948e176f..1f65df48984783a992d8e6d77dcdd7e6d803ff0d 100644 --- a/drivers/crypto/caam/blob_gen.c +++ b/drivers/crypto/caam/blob_gen.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) "caam blob_gen: " fmt +#include #include #include @@ -61,12 +62,14 @@ static void caam_blob_job_done(struct device *dev, u32 *desc, u32 err, void *con int caam_process_blob(struct caam_blob_priv *priv, struct caam_blob_info *info, bool encap) { + const struct caam_drv_private *ctrlpriv; struct caam_blob_job_result testres; struct device *jrdev = &priv->jrdev; dma_addr_t dma_in, dma_out; int op = OP_PCLID_BLOB; size_t output_len; u32 *desc; + u32 moo; int ret; if (info->key_mod_len > CAAM_BLOB_KEYMOD_LENGTH) @@ -100,6 +103,12 @@ int caam_process_blob(struct caam_blob_priv *priv, goto out_unmap_in; } + ctrlpriv = dev_get_drvdata(jrdev->parent); + moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status)); + if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED) + dev_warn(jrdev, + "using insecure test key, enable HAB to use unique device key!\n"); + /* * A data blob is encrypted using a blob key (BK); a random number. * The BK is used as an AES-CCM key. The initial block (B0) and the diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index d3d8bb0a6990022f874820c6bbbe73264df277a5..ecc15bc521db1c63702cae077f183e6b9bdbc970 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -131,7 +131,7 @@ struct caam_aead_req_ctx { static int aead_null_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 *desc; @@ -184,7 +184,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), struct caam_aead_alg, aead); unsigned int ivsize = crypto_aead_ivsize(aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 ctx1_iv_off = 0; @@ -312,7 +312,7 @@ skip_givenc: static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -322,7 +322,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -372,7 +372,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -387,7 +387,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -440,7 +440,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -455,7 +455,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -508,7 +508,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -521,7 +521,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int chachapoly_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc; @@ -547,7 +547,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead) static int chachapoly_setauthsize(struct crypto_aead *aead, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); if (authsize != POLY1305_DIGEST_SIZE) return -EINVAL; @@ -559,7 +559,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead, static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; @@ -575,7 +575,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); struct crypto_authenc_keys keys; @@ -656,7 +656,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -677,7 +677,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -703,7 +703,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int err; @@ -729,7 +729,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), typeof(*alg), skcipher); @@ -832,7 +832,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); u32 *desc; @@ -1057,7 +1057,7 @@ static void init_aead_job(struct aead_request *req, bool all_contig, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int authsize = ctx->authsize; u32 *desc = edesc->hw_desc; u32 out_options, in_options; @@ -1118,7 +1118,7 @@ static void init_gcm_job(struct aead_request *req, bool all_contig, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); u32 *desc = edesc->hw_desc; bool generic_gcm = (ivsize == GCM_AES_IV_SIZE); @@ -1185,7 +1185,7 @@ static void init_authenc_job(struct aead_request *req, struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), struct caam_aead_alg, aead); unsigned int ivsize = crypto_aead_ivsize(aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent); const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) == OP_ALG_AAI_CTR_MOD128); @@ -1234,7 +1234,7 @@ static void init_skcipher_job(struct skcipher_request *req, const bool encrypt) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; int ivsize = crypto_skcipher_ivsize(skcipher); u32 *desc = edesc->hw_desc; @@ -1290,7 +1290,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_aead_req_ctx *rctx = aead_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? @@ -1457,7 +1457,7 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; u32 *desc; @@ -1491,7 +1491,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; @@ -1524,7 +1524,7 @@ static int aead_decrypt(struct aead_request *req) static int aead_do_one_req(struct crypto_engine *engine, void *areq) { struct aead_request *req = aead_request_cast(areq); - struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req)); + struct caam_ctx *ctx = crypto_aead_ctx_dma(crypto_aead_reqtfm(req)); struct caam_aead_req_ctx *rctx = aead_request_ctx(req); u32 *desc = rctx->edesc->hw_desc; int ret; @@ -1550,7 +1550,7 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; bool all_contig; @@ -1597,7 +1597,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, int desc_bytes) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); struct device *jrdev = ctx->jrdev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? @@ -1756,7 +1756,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, static int skcipher_do_one_req(struct crypto_engine *engine, void *areq) { struct skcipher_request *req = skcipher_request_cast(areq); - struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req)); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(crypto_skcipher_reqtfm(req)); struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req); u32 *desc = rctx->edesc->hw_desc; int ret; @@ -1790,7 +1790,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); @@ -3397,7 +3397,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -3434,7 +3434,7 @@ static int caam_aead_init(struct crypto_aead *tfm) struct aead_alg *alg = crypto_aead_alg(tfm); struct caam_aead_alg *caam_alg = container_of(alg, struct caam_aead_alg, aead); - struct caam_ctx *ctx = crypto_aead_ctx(tfm); + struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm); crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx)); @@ -3454,7 +3454,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -3463,7 +3463,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_aead_exit(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } void caam_algapi_exit(void) @@ -3491,7 +3491,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -3505,7 +3505,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c index 189a7438b29c46166cc80985d04a4dc9efb830d9..c37b67be0492d734e7a27bb4376038aff82ffded 100644 --- a/drivers/crypto/caam/caamalg_qi.c +++ b/drivers/crypto/caam/caamalg_qi.c @@ -81,7 +81,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) { struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); u32 ctx1_iv_off = 0; u32 *nonce = NULL; @@ -184,7 +184,7 @@ skip_givenc: static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -195,7 +195,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); struct crypto_authenc_keys keys; @@ -299,7 +299,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -342,7 +342,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -358,7 +358,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -402,7 +402,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -446,7 +446,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -462,7 +462,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -510,7 +510,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN - ctx->cdata.keylen; @@ -554,7 +554,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -568,7 +568,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *jrdev = ctx->jrdev; int ret; @@ -617,7 +617,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), typeof(*alg), skcipher); @@ -731,7 +731,7 @@ static int des_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); int ret = 0; @@ -915,7 +915,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status) struct aead_edesc *edesc; struct aead_request *aead_req = drv_req->app_ctx; struct crypto_aead *aead = crypto_aead_reqtfm(aead_req); - struct caam_ctx *caam_ctx = crypto_aead_ctx(aead); + struct caam_ctx *caam_ctx = crypto_aead_ctx_dma(aead); int ecode = 0; qidev = caam_ctx->qidev; @@ -937,7 +937,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); struct device *qidev = ctx->qidev; @@ -1157,7 +1157,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ret; if (unlikely(caam_congested)) @@ -1207,7 +1207,7 @@ static void skcipher_done(struct caam_drv_req *drv_req, u32 status) struct skcipher_edesc *edesc; struct skcipher_request *req = drv_req->app_ctx; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *caam_ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *caam_ctx = crypto_skcipher_ctx_dma(skcipher); struct device *qidev = caam_ctx->qidev; int ivsize = crypto_skcipher_ivsize(skcipher); int ecode = 0; @@ -1245,7 +1245,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *qidev = ctx->qidev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1405,7 +1405,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent); int ret; @@ -2491,7 +2491,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -2524,7 +2524,7 @@ static int caam_aead_init(struct crypto_aead *tfm) struct aead_alg *alg = crypto_aead_alg(tfm); struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(tfm); + struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm); return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp); } @@ -2542,7 +2542,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -2551,7 +2551,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_aead_exit(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } void caam_qi_algapi_exit(void) @@ -2579,7 +2579,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -2593,7 +2593,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c index 4482cb145d051d3304f58b722abcbc8cce8eac77..1b0dd742c53f98673eab633b911c3217f4bd6d17 100644 --- a/drivers/crypto/caam/caamalg_qi2.c +++ b/drivers/crypto/caam/caamalg_qi2.c @@ -134,12 +134,12 @@ static struct caam_request *to_caam_req(struct crypto_async_request *areq) { switch (crypto_tfm_alg_type(areq->tfm)) { case CRYPTO_ALG_TYPE_SKCIPHER: - return skcipher_request_ctx(skcipher_request_cast(areq)); + return skcipher_request_ctx_dma(skcipher_request_cast(areq)); case CRYPTO_ALG_TYPE_AEAD: - return aead_request_ctx(container_of(areq, struct aead_request, - base)); + return aead_request_ctx_dma( + container_of(areq, struct aead_request, base)); case CRYPTO_ALG_TYPE_AHASH: - return ahash_request_ctx(ahash_request_cast(areq)); + return ahash_request_ctx_dma(ahash_request_cast(areq)); default: return ERR_PTR(-EINVAL); } @@ -171,7 +171,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) { struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); struct device *dev = ctx->dev; struct dpaa2_caam_priv *priv = dev_get_drvdata(dev); @@ -276,7 +276,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); ctx->authsize = authsize; aead_set_sh_desc(authenc); @@ -287,7 +287,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int aead_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; struct crypto_authenc_keys keys; @@ -350,10 +350,10 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req, bool encrypt) { struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_request *req_ctx = aead_request_ctx(req); + struct caam_request *req_ctx = aead_request_ctx_dma(req); struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead), typeof(*alg), aead); struct device *dev = ctx->dev; @@ -587,7 +587,7 @@ skip_out_fle: static int chachapoly_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); struct device *dev = ctx->dev; struct caam_flc *flc; @@ -620,7 +620,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead) static int chachapoly_setauthsize(struct crypto_aead *aead, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); if (authsize != POLY1305_DIGEST_SIZE) return -EINVAL; @@ -632,7 +632,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead, static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); unsigned int ivsize = crypto_aead_ivsize(aead); unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize; @@ -647,7 +647,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key, static int gcm_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -704,7 +704,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead) static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_gcm_check_authsize(authsize); @@ -720,7 +720,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize) static int gcm_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -739,7 +739,7 @@ static int gcm_setkey(struct crypto_aead *aead, static int rfc4106_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -799,7 +799,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead) static int rfc4106_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); int err; err = crypto_rfc4106_check_authsize(authsize); @@ -815,7 +815,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc, static int rfc4106_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -840,7 +840,7 @@ static int rfc4106_setkey(struct crypto_aead *aead, static int rfc4543_set_sh_desc(struct crypto_aead *aead) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; unsigned int ivsize = crypto_aead_ivsize(aead); struct caam_flc *flc; @@ -900,7 +900,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead) static int rfc4543_setauthsize(struct crypto_aead *authenc, unsigned int authsize) { - struct caam_ctx *ctx = crypto_aead_ctx(authenc); + struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc); if (authsize != 16) return -EINVAL; @@ -914,7 +914,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc, static int rfc4543_setkey(struct crypto_aead *aead, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); struct device *dev = ctx->dev; int ret; @@ -940,7 +940,7 @@ static int rfc4543_setkey(struct crypto_aead *aead, static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen, const u32 ctx1_iv_off) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct caam_skcipher_alg *alg = container_of(crypto_skcipher_alg(skcipher), struct caam_skcipher_alg, skcipher); @@ -1059,7 +1059,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher, static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, unsigned int keylen) { - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *dev = ctx->dev; struct dpaa2_caam_priv *priv = dev_get_drvdata(dev); struct caam_flc *flc; @@ -1109,10 +1109,10 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key, static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req) { struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_request *req_ctx = skcipher_request_ctx(req); + struct caam_request *req_ctx = skcipher_request_ctx_dma(req); struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct device *dev = ctx->dev; gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; @@ -1286,7 +1286,7 @@ static void aead_encrypt_done(void *cbk_ctx, u32 status) struct caam_request *req_ctx = to_caam_req(areq); struct aead_edesc *edesc = req_ctx->edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -1307,7 +1307,7 @@ static void aead_decrypt_done(void *cbk_ctx, u32 status) struct caam_request *req_ctx = to_caam_req(areq); struct aead_edesc *edesc = req_ctx->edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -1324,8 +1324,8 @@ static int aead_encrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct caam_request *caam_req = aead_request_ctx(req); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); + struct caam_request *caam_req = aead_request_ctx_dma(req); int ret; /* allocate extended descriptor */ @@ -1352,8 +1352,8 @@ static int aead_decrypt(struct aead_request *req) { struct aead_edesc *edesc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct caam_ctx *ctx = crypto_aead_ctx(aead); - struct caam_request *caam_req = aead_request_ctx(req); + struct caam_ctx *ctx = crypto_aead_ctx_dma(aead); + struct caam_request *caam_req = aead_request_ctx_dma(req); int ret; /* allocate extended descriptor */ @@ -1392,7 +1392,7 @@ static void skcipher_encrypt_done(void *cbk_ctx, u32 status) struct skcipher_request *req = skcipher_request_cast(areq); struct caam_request *req_ctx = to_caam_req(areq); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct skcipher_edesc *edesc = req_ctx->edesc; int ecode = 0; int ivsize = crypto_skcipher_ivsize(skcipher); @@ -1430,7 +1430,7 @@ static void skcipher_decrypt_done(void *cbk_ctx, u32 status) struct skcipher_request *req = skcipher_request_cast(areq); struct caam_request *req_ctx = to_caam_req(areq); struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); struct skcipher_edesc *edesc = req_ctx->edesc; int ecode = 0; int ivsize = crypto_skcipher_ivsize(skcipher); @@ -1474,8 +1474,8 @@ static int skcipher_encrypt(struct skcipher_request *req) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct caam_request *caam_req = skcipher_request_ctx(req); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); + struct caam_request *caam_req = skcipher_request_ctx_dma(req); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); int ret; @@ -1524,8 +1524,8 @@ static int skcipher_decrypt(struct skcipher_request *req) { struct skcipher_edesc *edesc; struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req); - struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher); - struct caam_request *caam_req = skcipher_request_ctx(req); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher); + struct caam_request *caam_req = skcipher_request_ctx_dma(req); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); int ret; @@ -1603,7 +1603,7 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) struct skcipher_alg *alg = crypto_skcipher_alg(tfm); struct caam_skcipher_alg *caam_alg = container_of(alg, typeof(*caam_alg), skcipher); - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK; int ret = 0; @@ -1621,10 +1621,12 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm) } ctx->fallback = fallback; - crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request) + - crypto_skcipher_reqsize(fallback)); + crypto_skcipher_set_reqsize_dma( + tfm, sizeof(struct caam_request) + + crypto_skcipher_reqsize(fallback)); } else { - crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request)); + crypto_skcipher_set_reqsize_dma(tfm, + sizeof(struct caam_request)); } ret = caam_cra_init(ctx, &caam_alg->caam, false); @@ -1640,8 +1642,8 @@ static int caam_cra_init_aead(struct crypto_aead *tfm) struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg), aead); - crypto_aead_set_reqsize(tfm, sizeof(struct caam_request)); - return caam_cra_init(crypto_aead_ctx(tfm), &caam_alg->caam, + crypto_aead_set_reqsize_dma(tfm, sizeof(struct caam_request)); + return caam_cra_init(crypto_aead_ctx_dma(tfm), &caam_alg->caam, !caam_alg->caam.nodkp); } @@ -1654,7 +1656,7 @@ static void caam_exit_common(struct caam_ctx *ctx) static void caam_cra_exit(struct crypto_skcipher *tfm) { - struct caam_ctx *ctx = crypto_skcipher_ctx(tfm); + struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (ctx->fallback) crypto_free_skcipher(ctx->fallback); @@ -1663,7 +1665,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm) static void caam_cra_exit_aead(struct crypto_aead *tfm) { - caam_exit_common(crypto_aead_ctx(tfm)); + caam_exit_common(crypto_aead_ctx_dma(tfm)); } static struct caam_skcipher_alg driver_algs[] = { @@ -3008,7 +3010,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY); @@ -3022,7 +3024,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg) alg->base.cra_module = THIS_MODULE; alg->base.cra_priority = CAAM_CRA_PRIORITY; - alg->base.cra_ctxsize = sizeof(struct caam_ctx); + alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding(); alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY | CRYPTO_ALG_KERN_DRIVER_ONLY; @@ -3132,7 +3134,7 @@ static inline int ctx_map_to_qm_sg(struct device *dev, static int ahash_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev); struct caam_flc *flc; @@ -3305,7 +3307,7 @@ err_flc: static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); unsigned int blocksize = crypto_tfm_alg_blocksize(&ahash->base); unsigned int digestsize = crypto_ahash_digestsize(ahash); int ret; @@ -3356,7 +3358,7 @@ bad_free_key: static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); @@ -3376,7 +3378,7 @@ static inline void ahash_unmap_ctx(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, u32 flag) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (state->ctx_dma) { dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag); @@ -3390,9 +3392,9 @@ static void ahash_done(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; @@ -3417,9 +3419,9 @@ static void ahash_done_bi(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -3455,9 +3457,9 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; @@ -3482,9 +3484,9 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) struct crypto_async_request *areq = cbk_ctx; struct ahash_request *req = ahash_request_cast(areq); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->caam_req.edesc; - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status); @@ -3518,8 +3520,8 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status) static int ahash_update_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3637,8 +3639,8 @@ unmap_ctx: static int ahash_final_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3708,8 +3710,8 @@ unmap_ctx: static int ahash_finup_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3802,8 +3804,8 @@ unmap_ctx: static int ahash_digest(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3897,8 +3899,8 @@ unmap: static int ahash_final_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -3970,8 +3972,8 @@ unmap: static int ahash_update_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4091,8 +4093,8 @@ unmap_ctx: static int ahash_finup_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4187,8 +4189,8 @@ unmap: static int ahash_update_first(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_request *req_ctx = &state->caam_req; struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1]; struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0]; @@ -4320,7 +4322,7 @@ static int ahash_finup_first(struct ahash_request *req) static int ahash_init(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); state->update = ahash_update_first; state->finup = ahash_finup_first; @@ -4337,28 +4339,28 @@ static int ahash_init(struct ahash_request *req) static int ahash_update(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->update(req); } static int ahash_finup(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->finup(req); } static int ahash_final(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->final(req); } static int ahash_export(struct ahash_request *req, void *out) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_export_state *export = out; u8 *buf = state->buf; int len = state->buflen; @@ -4375,7 +4377,7 @@ static int ahash_export(struct ahash_request *req, void *out) static int ahash_import(struct ahash_request *req, const void *in) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); const struct caam_export_state *export = in; memset(state, 0, sizeof(*state)); @@ -4547,7 +4549,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) container_of(halg, struct ahash_alg, halg); struct caam_hash_alg *caam_hash = container_of(alg, struct caam_hash_alg, ahash_alg); - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, HASH_MSG_LEN + SHA1_DIGEST_SIZE, @@ -4594,8 +4596,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) OP_ALG_ALGSEL_SUBMASK) >> OP_ALG_ALGSEL_SHIFT]; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct caam_hash_state)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state)); /* * For keyed hash algorithms shared descriptors @@ -4606,7 +4607,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) static void caam_hash_cra_exit(struct crypto_tfm *tfm) { - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc), DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC); @@ -4646,7 +4647,7 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev, alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; alg->cra_exit = caam_hash_cra_exit; - alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CAAM_CRA_PRIORITY; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 36ef738e4a181960b2c5e786e3933f478d50d0cd..1050e965a43857c9bf0f73b2802a050ab0f6adb0 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -199,7 +199,7 @@ static inline int ctx_map_to_sec4_sg(struct device *jrdev, static int ahash_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent); @@ -255,7 +255,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash) static int axcbc_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; u32 *desc; @@ -307,7 +307,7 @@ static int axcbc_set_sh_desc(struct crypto_ahash *ahash) static int acmac_set_sh_desc(struct crypto_ahash *ahash) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int digestsize = crypto_ahash_digestsize(ahash); struct device *jrdev = ctx->jrdev; u32 *desc; @@ -421,7 +421,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key, static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *jrdev = ctx->jrdev; int blocksize = crypto_tfm_alg_blocksize(&ahash->base); int digestsize = crypto_ahash_digestsize(ahash); @@ -484,7 +484,7 @@ static int ahash_setkey(struct crypto_ahash *ahash, static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *jrdev = ctx->jrdev; if (keylen != AES_KEYSIZE_128) @@ -504,7 +504,7 @@ static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key, static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int err; err = aes_check_keylen(keylen); @@ -543,7 +543,7 @@ static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, int dst_len) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (edesc->src_nents) dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); @@ -563,7 +563,7 @@ static inline void ahash_unmap_ctx(struct device *dev, struct ahash_edesc *edesc, struct ahash_request *req, int dst_len, u32 flag) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); if (state->ctx_dma) { dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag); @@ -580,8 +580,8 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err, struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); int digestsize = crypto_ahash_digestsize(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); int ecode = 0; bool has_bklog; @@ -630,8 +630,8 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err, struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev); struct ahash_edesc *edesc; struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); int digestsize = crypto_ahash_digestsize(ahash); int ecode = 0; bool has_bklog; @@ -695,8 +695,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req, dma_addr_t sh_desc_dma) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; struct ahash_edesc *edesc; @@ -755,8 +755,8 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx, static int ahash_do_one_req(struct crypto_engine *engine, void *areq) { struct ahash_request *req = ahash_request_cast(areq); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req)); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(crypto_ahash_reqtfm(req)); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u32 *desc = state->edesc->hw_desc; int ret; @@ -785,7 +785,7 @@ static int ahash_enqueue_req(struct device *jrdev, int dst_len, enum dma_data_direction dir) { struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct ahash_edesc *edesc = state->edesc; u32 *desc = edesc->hw_desc; int ret; @@ -815,8 +815,8 @@ static int ahash_enqueue_req(struct device *jrdev, static int ahash_update_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -940,8 +940,8 @@ unmap_ctx: static int ahash_final_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1001,8 +1001,8 @@ static int ahash_final_ctx(struct ahash_request *req) static int ahash_finup_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1075,8 +1075,8 @@ static int ahash_finup_ctx(struct ahash_request *req) static int ahash_digest(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u32 *desc; int digestsize = crypto_ahash_digestsize(ahash); @@ -1142,8 +1142,8 @@ static int ahash_digest(struct ahash_request *req) static int ahash_final_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int buflen = state->buflen; @@ -1191,8 +1191,8 @@ static int ahash_final_no_ctx(struct ahash_request *req) static int ahash_update_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -1312,8 +1312,8 @@ static int ahash_update_no_ctx(struct ahash_request *req) static int ahash_finup_no_ctx(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; int buflen = state->buflen; u32 *desc; @@ -1388,8 +1388,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req) static int ahash_update_first(struct ahash_request *req) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct device *jrdev = ctx->jrdev; u8 *buf = state->buf; int *buflen = &state->buflen; @@ -1498,7 +1498,7 @@ static int ahash_finup_first(struct ahash_request *req) static int ahash_init(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); state->update = ahash_update_first; state->finup = ahash_finup_first; @@ -1515,28 +1515,28 @@ static int ahash_init(struct ahash_request *req) static int ahash_update(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->update(req); } static int ahash_finup(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->finup(req); } static int ahash_final(struct ahash_request *req) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); return state->final(req); } static int ahash_export(struct ahash_request *req, void *out) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); struct caam_export_state *export = out; u8 *buf = state->buf; int len = state->buflen; @@ -1553,7 +1553,7 @@ static int ahash_export(struct ahash_request *req, void *out) static int ahash_import(struct ahash_request *req, const void *in) { - struct caam_hash_state *state = ahash_request_ctx(req); + struct caam_hash_state *state = ahash_request_ctx_dma(req); const struct caam_export_state *export = in; memset(state, 0, sizeof(*state)); @@ -1762,7 +1762,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) container_of(halg, struct ahash_alg, halg); struct caam_hash_alg *caam_hash = container_of(alg, struct caam_hash_alg, ahash_alg); - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); /* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */ static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE, HASH_MSG_LEN + SHA1_DIGEST_SIZE, @@ -1854,8 +1854,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) ctx->enginectx.op.do_one_request = ahash_do_one_req; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct caam_hash_state)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state)); /* * For keyed hash algorithms shared descriptors @@ -1866,7 +1865,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm) static void caam_hash_cra_exit(struct crypto_tfm *tfm) { - struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma, offsetof(struct caam_hash_ctx, key) - @@ -1926,7 +1925,7 @@ caam_hash_alloc(struct caam_hash_template *template, alg->cra_module = THIS_MODULE; alg->cra_init = caam_hash_cra_init; alg->cra_exit = caam_hash_cra_exit; - alg->cra_ctxsize = sizeof(struct caam_hash_ctx); + alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CAAM_CRA_PRIORITY; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 88672757671012023ff08c29cde3ea61df066cba..aef031946f337b65dbca4b045ecb21e40efef318 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -57,7 +57,7 @@ static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_pub_pdb *pdb = &edesc->pdb.pub; @@ -69,7 +69,7 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; @@ -81,7 +81,7 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; size_t p_sz = key->p_sz; @@ -98,7 +98,7 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; size_t p_sz = key->p_sz; @@ -149,7 +149,7 @@ static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err, struct akcipher_request *req = context; struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_drv_private_jr *jrp = dev_get_drvdata(dev); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc; @@ -242,7 +242,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, size_t desclen) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *dev = ctx->dev; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct caam_rsa_key *key = &ctx->key; @@ -371,7 +371,7 @@ static int akcipher_do_one_req(struct crypto_engine *engine, void *areq) base); struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; u32 *desc = req_ctx->edesc->hw_desc; int ret; @@ -399,7 +399,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req, { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_pub_pdb *pdb = &edesc->pdb.pub; @@ -444,7 +444,7 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; @@ -491,7 +491,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; @@ -568,7 +568,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req, struct rsa_edesc *edesc) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *dev = ctx->dev; struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; @@ -664,7 +664,7 @@ static int akcipher_enqueue_req(struct device *jrdev, { struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev); struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req); struct rsa_edesc *edesc = req_ctx->edesc; @@ -707,7 +707,7 @@ static int akcipher_enqueue_req(struct device *jrdev, static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; @@ -746,7 +746,7 @@ init_fail: static int caam_rsa_dec_priv_f1(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -775,7 +775,7 @@ init_fail: static int caam_rsa_dec_priv_f2(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -804,7 +804,7 @@ init_fail: static int caam_rsa_dec_priv_f3(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; @@ -833,7 +833,7 @@ init_fail: static int caam_rsa_dec(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; int ret; @@ -936,7 +936,7 @@ static int caam_rsa_check_key_length(unsigned int len) static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; @@ -1038,7 +1038,7 @@ free_p: static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key = {NULL}; struct caam_rsa_key *rsa_key = &ctx->key; int ret; @@ -1089,7 +1089,7 @@ err: static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); return ctx->key.n_sz; } @@ -1097,7 +1097,9 @@ static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm) /* Per session pkc's driver context creation function */ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); + + akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx)); ctx->dev = caam_jr_alloc(); @@ -1123,7 +1125,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) /* Per session pkc's driver context cleanup function */ static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) { - struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct caam_rsa_key *key = &ctx->key; dma_unmap_single(ctx->dev, ctx->padding_dma, CAAM_RSA_MAX_INPUT_SIZE - @@ -1141,13 +1143,13 @@ static struct caam_akcipher_alg caam_rsa = { .max_size = caam_rsa_max_size, .init = caam_rsa_init_tfm, .exit = caam_rsa_exit_tfm, - .reqsize = sizeof(struct caam_rsa_req_ctx), .base = { .cra_name = "rsa", .cra_driver_name = "rsa-caam", .cra_priority = 3000, .cra_module = THIS_MODULE, - .cra_ctxsize = sizeof(struct caam_rsa_ctx), + .cra_ctxsize = sizeof(struct caam_rsa_ctx) + + CRYPTO_DMA_PADDING, }, } }; diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 77d048dfe5d061923a1e1422c00a525aa10c5908..1f0e820509767358b4bda23ff3f6647848d8865c 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -246,7 +246,6 @@ int caam_rng_init(struct device *ctrldev) ctx->rng.cleanup = caam_cleanup; ctx->rng.read = caam_read; ctx->rng.priv = (unsigned long)ctx; - ctx->rng.quality = 1024; dev_info(ctrldev, "registering rng-caam\n"); diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 66d6dad841bb2aa5d4f2871c4dfe990cbd9e60f0..66928f8a0c4b1e8baa4507eb70a1b5e6bce41c33 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -426,6 +426,9 @@ struct caam_perfmon { u32 rsvd2; #define CSTA_PLEND BIT(10) #define CSTA_ALT_PLEND BIT(18) +#define CSTA_MOO GENMASK(9, 8) +#define CSTA_MOO_SECURE 1 +#define CSTA_MOO_TRUSTED 2 u32 status; /* CSTA - CAAM Status */ u64 rsvd3; diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c index ce3b91c612f0f83db1c18ee8645a0b746d813e38..9eca0c30218669e64b8a6e83386ce587aace4169 100644 --- a/drivers/crypto/cavium/cpt/cptvf_algs.c +++ b/drivers/crypto/cavium/cpt/cptvf_algs.c @@ -97,7 +97,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(tfm); - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct fc_context *fctx = &rctx->fctx; u32 enc_iv_len = crypto_skcipher_ivsize(tfm); struct cpt_request_info *req_info = &rctx->cpt_req; @@ -151,7 +151,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline u32 create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct cpt_request_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -173,7 +173,7 @@ static inline void store_cb_info(struct skcipher_request *req, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); struct cpt_request_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -193,7 +193,7 @@ static inline void create_output_list(struct skcipher_request *req, static inline int cvm_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct cvm_req_ctx *rctx = skcipher_request_ctx(req); + struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req); u32 enc_iv_len = crypto_skcipher_ivsize(tfm); struct fc_context *fctx = &rctx->fctx; struct cpt_request_info *req_info = &rctx->cpt_req; @@ -335,7 +335,7 @@ static int cvm_ecb_des3_setkey(struct crypto_skcipher *cipher, const u8 *key, static int cvm_enc_dec_init(struct crypto_skcipher *tfm) { - crypto_skcipher_set_reqsize(tfm, sizeof(struct cvm_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct cvm_req_ctx)); return 0; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c index c93c4e41d267870d3e341b4821a6017c405772e6..0653484df23f70a5b0b015a697376bc0e2e4ddf3 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_aead.c +++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c @@ -392,7 +392,7 @@ static int nitrox_rfc4106_setauthsize(struct crypto_aead *aead, static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq) { - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; unsigned int assoclen = areq->assoclen - GCM_RFC4106_IV_SIZE; struct scatterlist *sg; @@ -424,7 +424,7 @@ static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq) static void nitrox_rfc4106_callback(void *arg, int err) { struct aead_request *areq = arg; - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_kcrypt_request *nkreq = &rctx->base.nkreq; free_src_sglist(nkreq); @@ -441,7 +441,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq) { struct crypto_aead *aead = crypto_aead_reqtfm(areq); struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); - struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); + struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; struct se_crypto_request *creq = &aead_rctx->nkreq.creq; int ret; @@ -472,7 +472,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq) static int nitrox_rfc4106_dec(struct aead_request *areq) { struct crypto_aead *aead = crypto_aead_reqtfm(areq); - struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead); + struct nitrox_crypto_ctx *nctx = crypto_aead_ctx_dma(aead); struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq); struct nitrox_aead_rctx *aead_rctx = &rctx->base; struct se_crypto_request *creq = &aead_rctx->nkreq.creq; @@ -510,8 +510,8 @@ static int nitrox_rfc4106_init(struct crypto_aead *aead) if (ret) return ret; - crypto_aead_set_reqsize(aead, sizeof(struct aead_request) + - sizeof(struct nitrox_rfc4106_rctx)); + crypto_aead_set_reqsize_dma(aead, sizeof(struct aead_request) + + sizeof(struct nitrox_rfc4106_rctx)); return 0; } diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c index 9e7308e39b304c80d3c67dca856c836d1bf229dd..d4e06999af9b7e3ff6d5b5d80d82e92e277e8212 100644 --- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c +++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c @@ -195,6 +195,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev) ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0); if (!ndev->iov.pf2vf_wq) { kfree(ndev->iov.vfdev); + ndev->iov.vfdev = NULL; return -ENOMEM; } /* enable pf2vf mailbox interrupts */ diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c index 11a305fa19e67c29875de647d70bde87bafe9136..d8426bdf319082720fc7b4b18f88e71ec37c0fa0 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c @@ -25,7 +25,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req, { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); unsigned int digest_size = crypto_ahash_digestsize(tfm); if (ret) @@ -56,8 +56,8 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes, unsigned int final) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct scatterlist *sg, *cmac_key_sg = NULL; unsigned int block_size = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); @@ -182,7 +182,7 @@ e_free: static int ccp_aes_cmac_init(struct ahash_request *req) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); memset(rctx, 0, sizeof(*rctx)); @@ -219,7 +219,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req) static int ccp_aes_cmac_export(struct ahash_request *req, void *out) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_aes_cmac_exp_ctx state; /* Don't let anything leak to 'out' */ @@ -238,7 +238,7 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out) static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) { - struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_aes_cmac_exp_ctx state; /* 'in' may not be aligned so memcpy to local variable */ @@ -256,7 +256,7 @@ static int ccp_aes_cmac_import(struct ahash_request *req, const void *in) static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo; @@ -334,13 +334,14 @@ static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key, static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); ctx->complete = ccp_aes_cmac_complete; ctx->u.aes.key_len = 0; - crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx)); + crypto_ahash_set_reqsize_dma(ahash, + sizeof(struct ccp_aes_cmac_req_ctx)); return 0; } @@ -382,7 +383,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head) CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; base->cra_blocksize = AES_BLOCK_SIZE; - base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding(); base->cra_priority = CCP_CRA_PRIORITY; base->cra_init = ccp_aes_cmac_cra_init; base->cra_module = THIS_MODULE; diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c index 1c1c939f5c39bb93c7fb3cfe3c9d18948a90b46b..b1dbb8cea559e82f3bca9000385471062fb42334 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c @@ -29,7 +29,7 @@ static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret) static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); switch (key_len) { case AES_KEYSIZE_128: @@ -76,8 +76,8 @@ static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm, static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); - struct ccp_aes_req_ctx *rctx = aead_request_ctx(req); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = aead_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; int i; @@ -148,12 +148,12 @@ static int ccp_aes_gcm_decrypt(struct aead_request *req) static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm) { - struct ccp_ctx *ctx = crypto_aead_ctx(tfm); + struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->complete = ccp_aes_gcm_complete; ctx->u.aes.key_len = 0; - crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx)); return 0; } @@ -176,7 +176,7 @@ static struct aead_alg ccp_aes_gcm_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct ccp_ctx), + .cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .cra_priority = CCP_CRA_PRIORITY, .cra_exit = ccp_aes_gcm_cra_exit, .cra_module = THIS_MODULE, diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 6849261ca47d037c7dc9f2c78e6a0cf9d25c09d5..93f735d6b02b280f9a65cf2c392305112e3701af 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -62,7 +62,7 @@ static struct ccp_unit_size_map xts_unit_sizes[] = { static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -75,7 +75,7 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret) static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); unsigned int ccpversion = ccp_version(); int ret; @@ -105,8 +105,8 @@ static int ccp_aes_xts_crypt(struct skcipher_request *req, unsigned int encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); unsigned int ccpversion = ccp_version(); unsigned int fallback = 0; unsigned int unit; @@ -196,7 +196,7 @@ static int ccp_aes_xts_decrypt(struct skcipher_request *req) static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; @@ -210,15 +210,16 @@ static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm) } ctx->u.aes.tfm_skcipher = fallback_tfm; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx) + - crypto_skcipher_reqsize(fallback_tfm)); + crypto_skcipher_set_reqsize_dma(tfm, + sizeof(struct ccp_aes_req_ctx) + + crypto_skcipher_reqsize(fallback_tfm)); return 0; } static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } @@ -246,7 +247,8 @@ static int ccp_register_aes_xts_alg(struct list_head *head, CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; alg->base.cra_blocksize = AES_BLOCK_SIZE; - alg->base.cra_ctxsize = sizeof(struct ccp_ctx); + alg->base.cra_ctxsize = sizeof(struct ccp_ctx) + + crypto_dma_padding(); alg->base.cra_priority = CCP_CRA_PRIORITY; alg->base.cra_module = THIS_MODULE; diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c index bed331953ff94ce4ae59bb4c711d048cd0d577bc..918e223f21b651f80bc0e031d487c07d98f85704 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes.c +++ b/drivers/crypto/ccp/ccp-crypto-aes.c @@ -22,8 +22,9 @@ static int ccp_aes_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma( + crypto_skcipher_reqtfm(req)); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -38,7 +39,7 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); switch (key_len) { case AES_KEYSIZE_128: @@ -65,8 +66,8 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; @@ -118,7 +119,7 @@ static int ccp_aes_decrypt(struct skcipher_request *req) static int ccp_aes_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_aes_complete; ctx->u.aes.key_len = 0; @@ -132,7 +133,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); /* Restore the original pointer */ req->iv = rctx->rfc3686_info; @@ -143,7 +144,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req, static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); if (key_len < CTR_RFC3686_NONCE_SIZE) return -EINVAL; @@ -157,8 +158,8 @@ static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_aes_rfc3686_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req); u8 *iv; /* Initialize the CTR block */ @@ -190,12 +191,12 @@ static int ccp_aes_rfc3686_decrypt(struct skcipher_request *req) static int ccp_aes_rfc3686_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_aes_rfc3686_complete; ctx->u.aes.key_len = 0; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx)); return 0; } @@ -213,7 +214,7 @@ static const struct skcipher_alg ccp_aes_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; @@ -231,7 +232,7 @@ static const struct skcipher_alg ccp_aes_rfc3686_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = CTR_RFC3686_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c index 278636ed251a5349c57faec8af89ff6048dfcf3d..afae30adb703f6a5cc3fd3bf3882881a8ea46300 100644 --- a/drivers/crypto/ccp/ccp-crypto-des3.c +++ b/drivers/crypto/ccp/ccp-crypto-des3.c @@ -21,8 +21,9 @@ static int ccp_des3_complete(struct crypto_async_request *async_req, int ret) { struct skcipher_request *req = skcipher_request_cast(async_req); - struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma( + crypto_skcipher_reqtfm(req)); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req); if (ret) return ret; @@ -37,7 +38,7 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int key_len) { struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); int err; err = verify_skcipher_des3_key(tfm, key); @@ -60,8 +61,8 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key, static int ccp_des3_crypt(struct skcipher_request *req, bool encrypt) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); + struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req); struct scatterlist *iv_sg = NULL; unsigned int iv_len = 0; @@ -114,12 +115,12 @@ static int ccp_des3_decrypt(struct skcipher_request *req) static int ccp_des3_init_tfm(struct crypto_skcipher *tfm) { - struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm); + struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm); ctx->complete = ccp_des3_complete; ctx->u.des3.key_len = 0; - crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_des3_req_ctx)); + crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_des3_req_ctx)); return 0; } @@ -137,7 +138,7 @@ static const struct skcipher_alg ccp_des3_defaults = { CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct ccp_ctx), + .base.cra_ctxsize = sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, .base.cra_priority = CCP_CRA_PRIORITY, .base.cra_module = THIS_MODULE, }; diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 5976530c00a8a16e90c420925bc90c2e8b792c5c..73442a382f683d2e277bc1f1f466e257996b0d87 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -78,13 +78,6 @@ struct ccp_crypto_cmd { int ret; }; -struct ccp_crypto_cpu { - struct work_struct work; - struct completion completion; - struct ccp_crypto_cmd *crypto_cmd; - int err; -}; - static inline bool ccp_crypto_success(int err) { if (err && (err != -EINPROGRESS) && (err != -EBUSY)) @@ -146,7 +139,7 @@ static void ccp_crypto_complete(void *data, int err) struct ccp_crypto_cmd *crypto_cmd = data; struct ccp_crypto_cmd *held, *next, *backlog; struct crypto_async_request *req = crypto_cmd->req; - struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(req->tfm); int ret; if (err == -EINPROGRESS) { @@ -190,7 +183,7 @@ static void ccp_crypto_complete(void *data, int err) break; /* Error occurred, report it and get the next entry */ - ctx = crypto_tfm_ctx(held->req->tfm); + ctx = crypto_tfm_ctx_dma(held->req->tfm); if (ctx->complete) ret = ctx->complete(held->req, ret); held->req->complete(held->req, ret); @@ -400,7 +393,7 @@ static void ccp_unregister_algs(void) } } -static int ccp_crypto_init(void) +static int __init ccp_crypto_init(void) { int ret; @@ -421,7 +414,7 @@ static int ccp_crypto_init(void) return ret; } -static void ccp_crypto_exit(void) +static void __exit ccp_crypto_exit(void) { ccp_unregister_algs(); } diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c index 1223ac70aea28714bb5cbbcbb773a231ef9cb176..a14f85512cf4f61a72870731dd7f5efc09b5ac12 100644 --- a/drivers/crypto/ccp/ccp-crypto-rsa.c +++ b/drivers/crypto/ccp/ccp-crypto-rsa.c @@ -44,7 +44,7 @@ static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen, static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) { struct akcipher_request *req = akcipher_request_cast(async_req); - struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req); if (ret) return ret; @@ -56,7 +56,7 @@ static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret) static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); return ctx->u.rsa.n_len; } @@ -64,8 +64,8 @@ static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm) static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); - struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); + struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req); int ret = 0; memset(&rctx->cmd, 0, sizeof(rctx->cmd)); @@ -126,7 +126,7 @@ static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx) static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key, unsigned int keylen, bool private) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); struct rsa_key raw_key; int ret; @@ -192,9 +192,9 @@ static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key, static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); - akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx)); + akcipher_set_reqsize_dma(tfm, sizeof(struct ccp_rsa_req_ctx)); ctx->complete = ccp_rsa_complete; return 0; @@ -202,7 +202,7 @@ static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm) static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base); + struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm); ccp_rsa_free_key_bufs(ctx); } @@ -220,7 +220,7 @@ static struct akcipher_alg ccp_rsa_defaults = { .cra_driver_name = "rsa-ccp", .cra_priority = CCP_CRA_PRIORITY, .cra_module = THIS_MODULE, - .cra_ctxsize = 2 * sizeof(struct ccp_ctx), + .cra_ctxsize = 2 * sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING, }, }; diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 74fa5360e7226d82615e16a515454c8400389f20..fa3ae8e78f6f3e412fe89b230dc56d393238688f 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -28,7 +28,7 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); unsigned int digest_size = crypto_ahash_digestsize(tfm); if (ret) @@ -59,8 +59,8 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, unsigned int final) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct scatterlist *sg; unsigned int block_size = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); @@ -182,8 +182,8 @@ e_free: static int ccp_sha_init(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm)); unsigned int block_size = @@ -231,7 +231,7 @@ static int ccp_sha_digest(struct ahash_request *req) static int ccp_sha_export(struct ahash_request *req, void *out) { - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_sha_exp_ctx state; /* Don't let anything leak to 'out' */ @@ -252,7 +252,7 @@ static int ccp_sha_export(struct ahash_request *req, void *out) static int ccp_sha_import(struct ahash_request *req, const void *in) { - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req); struct ccp_sha_exp_ctx state; /* 'in' may not be aligned so memcpy to local variable */ @@ -272,7 +272,7 @@ static int ccp_sha_import(struct ahash_request *req, const void *in) static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { - struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct crypto_shash *shash = ctx->u.sha.hmac_tfm; unsigned int block_size = crypto_shash_blocksize(shash); unsigned int digest_size = crypto_shash_digestsize(shash); @@ -313,13 +313,13 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, static int ccp_sha_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); + struct ccp_ctx *ctx = crypto_ahash_ctx_dma(ahash); ctx->complete = ccp_sha_complete; ctx->u.sha.key_len = 0; - crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct ccp_sha_req_ctx)); return 0; } @@ -330,7 +330,7 @@ static void ccp_sha_cra_exit(struct crypto_tfm *tfm) static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); struct crypto_shash *hmac_tfm; @@ -348,7 +348,7 @@ static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) { - struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); + struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm); if (ctx->u.sha.hmac_tfm) crypto_free_shash(ctx->u.sha.hmac_tfm); @@ -492,7 +492,7 @@ static int ccp_register_sha_alg(struct list_head *head, CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_NEED_FALLBACK; base->cra_blocksize = def->block_size; - base->cra_ctxsize = sizeof(struct ccp_ctx); + base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding(); base->cra_priority = CCP_CRA_PRIORITY; base->cra_init = ccp_sha_cra_init; base->cra_exit = ccp_sha_cra_exit; diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c index 792d6da7f0c07aea57f8b169ca6ce398813c090b..084d052fddccbd63514efc15d7a3b95ceffba139 100644 --- a/drivers/crypto/ccp/sp-pci.c +++ b/drivers/crypto/ccp/sp-pci.c @@ -381,6 +381,15 @@ static const struct psp_vdata pspv3 = { .inten_reg = 0x10690, .intsts_reg = 0x10694, }; + +static const struct psp_vdata pspv4 = { + .sev = &sevv2, + .tee = &teev1, + .feature_reg = 0x109fc, + .inten_reg = 0x10690, + .intsts_reg = 0x10694, +}; + #endif static const struct sp_dev_vdata dev_vdata[] = { @@ -426,7 +435,7 @@ static const struct sp_dev_vdata dev_vdata[] = { { /* 5 */ .bar = 2, #ifdef CONFIG_CRYPTO_DEV_SP_PSP - .psp_vdata = &pspv2, + .psp_vdata = &pspv4, #endif }, { /* 6 */ diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c index 35794c7271fb614d4179d89aad6d362d25d2cf2a..109ffb375fc69510be8b7339fb73fda00e1515cc 100644 --- a/drivers/crypto/ccree/cc_aead.c +++ b/drivers/crypto/ccree/cc_aead.c @@ -138,7 +138,7 @@ static int cc_aead_init(struct crypto_aead *tfm) ctx->flow_mode = cc_alg->flow_mode; ctx->auth_mode = cc_alg->auth_mode; ctx->drvdata = cc_alg->drvdata; - crypto_aead_set_reqsize(tfm, sizeof(struct aead_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct aead_req_ctx)); /* Allocate key buffer, cache line aligned */ ctx->enckey = dma_alloc_coherent(dev, AES_MAX_KEY_SIZE, @@ -208,7 +208,7 @@ init_failed: static void cc_aead_complete(struct device *dev, void *cc_req, int err) { struct aead_request *areq = (struct aead_request *)cc_req; - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); struct crypto_aead *tfm = crypto_aead_reqtfm(cc_req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -723,7 +723,7 @@ static void cc_set_assoc_desc(struct aead_request *areq, unsigned int flow_mode, { struct crypto_aead *tfm = crypto_aead_reqtfm(areq); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type assoc_dma_type = areq_ctx->assoc_buff_type; unsigned int idx = *seq_size; struct device *dev = drvdata_to_dev(ctx->drvdata); @@ -762,7 +762,7 @@ static void cc_proc_authen_desc(struct aead_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size, int direct) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type; unsigned int idx = *seq_size; struct crypto_aead *tfm = crypto_aead_reqtfm(areq); @@ -827,7 +827,7 @@ static void cc_proc_cipher_desc(struct aead_request *areq, unsigned int *seq_size) { unsigned int idx = *seq_size; - struct aead_req_ctx *areq_ctx = aead_request_ctx(areq); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq); enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type; struct crypto_aead *tfm = crypto_aead_reqtfm(areq); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); @@ -873,7 +873,7 @@ static void cc_proc_digest_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ? DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256; @@ -923,7 +923,7 @@ static void cc_set_cipher_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = req_ctx->hw_iv_size; unsigned int idx = *seq_size; int direct = req_ctx->gen_ctx.op_type; @@ -965,7 +965,7 @@ static void cc_set_cipher_desc(struct aead_request *req, static void cc_proc_cipher(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size, unsigned int data_flow_mode) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int idx = *seq_size; @@ -1082,7 +1082,7 @@ static void cc_proc_header_desc(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* Hash associated data */ @@ -1158,7 +1158,7 @@ static void cc_proc_scheme_desc(struct aead_request *req, static void cc_mlli_to_sram(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); @@ -1212,7 +1212,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int data_flow_mode = cc_get_data_flow(direct, ctx->flow_mode, @@ -1265,7 +1265,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); int direct = req_ctx->gen_ctx.op_type; unsigned int data_flow_mode = cc_get_data_flow(direct, ctx->flow_mode, @@ -1312,7 +1312,7 @@ static int validate_data_size(struct cc_aead_ctx *ctx, enum drv_crypto_direction direct, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int assoclen = areq_ctx->assoclen; unsigned int cipherlen = (direct == DRV_CRYPTO_DIRECTION_DECRYPT) ? @@ -1411,7 +1411,7 @@ static int cc_ccm(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; unsigned int cipher_flow_mode; dma_addr_t mac_result; @@ -1533,7 +1533,7 @@ static int config_ccm_adata(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); //unsigned int size_of_a = 0, rem_a_size = 0; unsigned int lp = req->iv[0]; /* Note: The code assume that req->iv[0] already contains the value @@ -1591,7 +1591,7 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); /* L' */ memset(areq_ctx->ctr_iv, 0, AES_BLOCK_SIZE); @@ -1615,7 +1615,7 @@ static void cc_set_ghash_desc(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* load key to AES*/ @@ -1693,7 +1693,7 @@ static void cc_set_gctr_desc(struct aead_request *req, struct cc_hw_desc desc[], { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int idx = *seq_size; /* load key to AES*/ @@ -1730,7 +1730,7 @@ static void cc_proc_gcm_result(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); dma_addr_t mac_result; unsigned int idx = *seq_size; @@ -1792,7 +1792,7 @@ static void cc_proc_gcm_result(struct aead_request *req, static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[], unsigned int *seq_size) { - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); unsigned int cipher_flow_mode; //in RFC4543 no data to encrypt. just copy data from src to dest. @@ -1830,7 +1830,7 @@ static int config_gcm_context(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *req_ctx = aead_request_ctx(req); + struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int cryptlen = (req_ctx->gen_ctx.op_type == @@ -1879,7 +1879,7 @@ static void cc_proc_rfc4_gcm(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_NONCE_OFFSET, ctx->ctr_nonce, GCM_BLOCK_RFC4_NONCE_SIZE); @@ -1896,7 +1896,7 @@ static int cc_proc_aead(struct aead_request *req, struct cc_hw_desc desc[MAX_AEAD_PROCESS_SEQ]; struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm); - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; @@ -2019,7 +2019,7 @@ exit: static int cc_aead_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2039,7 +2039,7 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req) { /* Very similar to cc_aead_encrypt() above. */ - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2063,7 +2063,7 @@ out: static int cc_aead_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; memset(areq_ctx, 0, sizeof(*areq_ctx)); @@ -2081,7 +2081,7 @@ static int cc_aead_decrypt(struct aead_request *req) static int cc_rfc4309_ccm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2193,7 +2193,7 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc, static int cc_rfc4106_gcm_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2217,7 +2217,7 @@ out: static int cc_rfc4543_gcm_encrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2244,7 +2244,7 @@ out: static int cc_rfc4106_gcm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); @@ -2268,7 +2268,7 @@ out: static int cc_rfc4543_gcm_decrypt(struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc; rc = crypto_ipsec_check_assoclen(req->assoclen); diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c index 9efd88f871d1232fdc5708c312de012347175152..bcca55bff910ef04832969d79efc49952d04d8c6 100644 --- a/drivers/crypto/ccree/cc_buffer_mgr.c +++ b/drivers/crypto/ccree/cc_buffer_mgr.c @@ -52,7 +52,7 @@ static inline char *cc_dma_buf_type(enum cc_req_dma_buf_type type) static void cc_copy_mac(struct device *dev, struct aead_request *req, enum cc_sg_cpy_direct dir) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); u32 skip = req->assoclen + req->cryptlen; cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src, @@ -456,7 +456,7 @@ cipher_exit: void cc_unmap_aead_request(struct device *dev, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct cc_drvdata *drvdata = dev_get_drvdata(dev); int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL); @@ -546,7 +546,7 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); unsigned int hw_iv_size = areq_ctx->hw_iv_size; struct device *dev = drvdata_to_dev(drvdata); gfp_t flags = cc_gfp_flags(&req->base); @@ -586,7 +586,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); int rc = 0; int mapped_nents = 0; struct device *dev = drvdata_to_dev(drvdata); @@ -652,7 +652,7 @@ chain_assoc_exit: static void cc_prepare_aead_data_dlli(struct aead_request *req, u32 *src_last_bytes, u32 *dst_last_bytes) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; struct scatterlist *sg; @@ -678,7 +678,7 @@ static void cc_prepare_aead_data_mlli(struct cc_drvdata *drvdata, u32 *src_last_bytes, u32 *dst_last_bytes, bool is_last_table) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; struct device *dev = drvdata_to_dev(drvdata); @@ -790,7 +790,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata, struct buffer_array *sg_data, bool is_last_table, bool do_chain) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct device *dev = drvdata_to_dev(drvdata); enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type; unsigned int authsize = areq_ctx->req_authsize; @@ -895,7 +895,7 @@ chain_data_exit: static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); u32 curr_mlli_size = 0; if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI) { @@ -945,7 +945,7 @@ static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata, int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req) { - struct aead_req_ctx *areq_ctx = aead_request_ctx(req); + struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req); struct mlli_params *mlli_params = &areq_ctx->mlli_params; struct device *dev = drvdata_to_dev(drvdata); struct buffer_array sg_data; diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c index 7083767602fcf98ad36051aa4279792aa9a0426c..8f008f024f8f1bee62b53339d1aa3327741940f7 100644 --- a/drivers/crypto/ccree/cc_debugfs.c +++ b/drivers/crypto/ccree/cc_debugfs.c @@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void) cc_debugfs_dir = debugfs_create_dir("ccree", NULL); } -void __exit cc_debugfs_global_fini(void) +void cc_debugfs_global_fini(void) { debugfs_remove(cc_debugfs_dir); } diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c index cadead18b59e8d6cf0862b49b074ab32cafcbc9f..d489c6f808925796655d5838aa010d7000dbbdaa 100644 --- a/drivers/crypto/ccree/cc_driver.c +++ b/drivers/crypto/ccree/cc_driver.c @@ -651,9 +651,17 @@ static struct platform_driver ccree_driver = { static int __init ccree_init(void) { + int rc; + cc_debugfs_global_init(); - return platform_driver_register(&ccree_driver); + rc = platform_driver_register(&ccree_driver); + if (rc) { + cc_debugfs_global_fini(); + return rc; + } + + return 0; } module_init(ccree_init); diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c index 683c9a430e11be1d18503a7a8a7ab8ef17234d1d..f418162932fe394dce84ee9673204cfb58f24a2c 100644 --- a/drivers/crypto/ccree/cc_hash.c +++ b/drivers/crypto/ccree/cc_hash.c @@ -283,9 +283,9 @@ static void cc_unmap_result(struct device *dev, struct ahash_req_ctx *state, static void cc_update_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -301,9 +301,9 @@ static void cc_update_complete(struct device *dev, void *cc_req, int err) static void cc_digest_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -321,9 +321,9 @@ static void cc_digest_complete(struct device *dev, void *cc_req, int err) static void cc_hash_complete(struct device *dev, void *cc_req, int err) { struct ahash_request *req = (struct ahash_request *)cc_req; - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); dev_dbg(dev, "req=%pK\n", req); @@ -341,9 +341,9 @@ static void cc_hash_complete(struct device *dev, void *cc_req, int err) static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, int idx) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); /* Get final MAC result */ @@ -364,9 +364,9 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req, static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req, int idx) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); /* store the hash digest result in the context */ @@ -417,9 +417,9 @@ static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req, static int cc_hash_digest(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -555,9 +555,9 @@ static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx, static int cc_hash_update(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -631,9 +631,9 @@ static int cc_hash_update(struct ahash_request *req) static int cc_do_finup(struct ahash_request *req, bool update) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); u32 digestsize = crypto_ahash_digestsize(tfm); struct scatterlist *src = req->src; unsigned int nbytes = req->nbytes; @@ -711,9 +711,9 @@ static int cc_hash_final(struct ahash_request *req) static int cc_hash_init(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "===== init (%d) ====\n", req->nbytes); @@ -736,7 +736,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key, u32 larval_addr; struct device *dev; - ctx = crypto_ahash_ctx(ahash); + ctx = crypto_ahash_ctx_dma(ahash); dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "start keylen: %d", keylen); @@ -922,7 +922,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { struct cc_crypto_req cc_req = {}; - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); int rc = 0; unsigned int idx = 0; @@ -1007,7 +1007,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash, static int cc_cmac_setkey(struct crypto_ahash *ahash, const u8 *key, unsigned int keylen) { - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "===== setkey (%d) ====\n", keylen); @@ -1109,7 +1109,7 @@ fail: static int cc_get_hash_len(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); if (ctx->hash_mode == DRV_HASH_SM3) return CC_SM3_HASH_LEN_SIZE; @@ -1119,7 +1119,7 @@ static int cc_get_hash_len(struct crypto_tfm *tfm) static int cc_cra_init(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct hash_alg_common *hash_alg_common = container_of(tfm->__crt_alg, struct hash_alg_common, base); struct ahash_alg *ahash_alg = @@ -1127,8 +1127,8 @@ static int cc_cra_init(struct crypto_tfm *tfm) struct cc_hash_alg *cc_alg = container_of(ahash_alg, struct cc_hash_alg, ahash_alg); - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ahash_req_ctx)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct ahash_req_ctx)); ctx->hash_mode = cc_alg->hash_mode; ctx->hw_mode = cc_alg->hw_mode; @@ -1140,7 +1140,7 @@ static int cc_cra_init(struct crypto_tfm *tfm) static void cc_cra_exit(struct crypto_tfm *tfm) { - struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); dev_dbg(dev, "cc_cra_exit"); @@ -1149,9 +1149,9 @@ static void cc_cra_exit(struct crypto_tfm *tfm) static int cc_mac_update(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base); struct cc_crypto_req cc_req = {}; @@ -1217,9 +1217,9 @@ static int cc_mac_update(struct ahash_request *req) static int cc_mac_final(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; @@ -1338,9 +1338,9 @@ static int cc_mac_final(struct ahash_request *req) static int cc_mac_finup(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); struct cc_crypto_req cc_req = {}; struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN]; @@ -1419,9 +1419,9 @@ static int cc_mac_finup(struct ahash_request *req) static int cc_mac_digest(struct ahash_request *req) { - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); struct device *dev = drvdata_to_dev(ctx->drvdata); u32 digestsize = crypto_ahash_digestsize(tfm); struct cc_crypto_req cc_req = {}; @@ -1499,8 +1499,8 @@ static int cc_mac_digest(struct ahash_request *req) static int cc_hash_export(struct ahash_request *req, void *out) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); u8 *curr_buff = cc_hash_buf(state); u32 curr_buff_cnt = *cc_hash_buf_cnt(state); const u32 tmp = CC_EXPORT_MAGIC; @@ -1525,9 +1525,9 @@ static int cc_hash_export(struct ahash_request *req, void *out) static int cc_hash_import(struct ahash_request *req, const void *in) { struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash); struct device *dev = drvdata_to_dev(ctx->drvdata); - struct ahash_req_ctx *state = ahash_request_ctx(req); + struct ahash_req_ctx *state = ahash_request_ctx_dma(req); u32 tmp; memcpy(&tmp, in, sizeof(u32)); @@ -1846,7 +1846,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template, template->driver_name); } alg->cra_module = THIS_MODULE; - alg->cra_ctxsize = sizeof(struct cc_hash_ctx); + alg->cra_ctxsize = sizeof(struct cc_hash_ctx) + crypto_dma_padding(); alg->cra_priority = CC_CRA_PRIO; alg->cra_blocksize = template->blocksize; alg->cra_alignmask = 0; @@ -2073,9 +2073,9 @@ static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size) { unsigned int idx = *seq_size; - struct ahash_req_ctx *state = ahash_request_ctx(areq); + struct ahash_req_ctx *state = ahash_request_ctx_dma(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); /* Setup XCBC MAC K1 */ hw_desc_init(&desc[idx]); @@ -2130,9 +2130,9 @@ static void cc_setup_cmac(struct ahash_request *areq, struct cc_hw_desc desc[], unsigned int *seq_size) { unsigned int idx = *seq_size; - struct ahash_req_ctx *state = ahash_request_ctx(areq); + struct ahash_req_ctx *state = ahash_request_ctx_dma(areq); struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); - struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm); + struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm); /* Setup CMAC Key */ hw_desc_init(&desc[idx]); diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig index f886401af13e7b271ccb94d62aa8c4a117ac0552..5dd3f6a4781a2c963c0bc1023686f643d933712b 100644 --- a/drivers/crypto/chelsio/Kconfig +++ b/drivers/crypto/chelsio/Kconfig @@ -3,11 +3,11 @@ config CRYPTO_DEV_CHELSIO tristate "Chelsio Crypto Co-processor Driver" depends on CHELSIO_T4 select CRYPTO_LIB_AES + select CRYPTO_LIB_GF128MUL select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select CRYPTO_AUTHENC - select CRYPTO_GF128MUL help The Chelsio Crypto Co-processor driver for T6 adapters. diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c index 6933546f87b1a863e9eecc94470d09168bde5b51..68d65773ef2bdc8f0b24cf028c950201ab949e98 100644 --- a/drivers/crypto/chelsio/chcr_algo.c +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -98,17 +98,17 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req, static inline struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->aeadctx; + return &ctx->crypto_ctx->aeadctx; } static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->ablkctx; + return &ctx->crypto_ctx->ablkctx; } static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx) { - return ctx->crypto_ctx->hmacctx; + return &ctx->crypto_ctx->hmacctx; } static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx) @@ -210,7 +210,7 @@ static inline int chcr_handle_aead_resp(struct aead_request *req, unsigned char *input, int err) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_dev *dev = a_ctx(tfm)->dev; @@ -718,7 +718,7 @@ static inline int get_qidxs(struct crypto_async_request *req, { struct aead_request *aead_req = container_of(req, struct aead_request, base); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(aead_req); *txqidx = reqctx->txqidx; *rxqidx = reqctx->rxqidx; break; @@ -2362,7 +2362,7 @@ static void chcr_hmac_cra_exit(struct crypto_tfm *tfm) inline void chcr_aead_common_exit(struct aead_request *req) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm)); @@ -2373,7 +2373,7 @@ static int chcr_aead_common_init(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); unsigned int authsize = crypto_aead_authsize(tfm); int error = -EINVAL; @@ -2417,7 +2417,7 @@ static int chcr_aead_fallback(struct aead_request *req, unsigned short op_type) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct aead_request *subreq = aead_request_ctx(req); + struct aead_request *subreq = aead_request_ctx_dma(req); aead_request_set_tfm(subreq, aeadctx->sw_cipher); aead_request_set_callback(subreq, req->base.flags, @@ -2438,7 +2438,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req, struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -2576,7 +2576,7 @@ int chcr_aead_dma_map(struct device *dev, unsigned short op_type) { int error; - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); int src_len, dst_len; @@ -2637,7 +2637,7 @@ void chcr_aead_dma_unmap(struct device *dev, struct aead_request *req, unsigned short op_type) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); unsigned int authsize = crypto_aead_authsize(tfm); int src_len, dst_len; @@ -2678,7 +2678,7 @@ void chcr_add_aead_src_ent(struct aead_request *req, struct ulptx_sgl *ulptx) { struct ulptx_walk ulp_walk; - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); if (reqctx->imm) { u8 *buf = (u8 *)ulptx; @@ -2704,7 +2704,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req, struct cpl_rx_phys_dsgl *phys_cpl, unsigned short qid) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct dsgl_walk dsgl_walk; unsigned int authsize = crypto_aead_authsize(tfm); @@ -2894,7 +2894,7 @@ static int generate_b0(struct aead_request *req, u8 *ivptr, unsigned int l, lp, m; int rc; struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); u8 *b0 = reqctx->scratch_pad; m = crypto_aead_authsize(aead); @@ -2932,7 +2932,7 @@ static int ccm_format_packet(struct aead_request *req, unsigned short op_type, unsigned int assoclen) { - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); int rc = 0; @@ -2963,7 +2963,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl, struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM; unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC; unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan; @@ -3036,7 +3036,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req, { struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm)); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -3135,7 +3135,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req, struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct sk_buff *skb = NULL; struct chcr_wr *chcr_req; struct cpl_rx_phys_dsgl *phys_cpl; @@ -3255,9 +3255,10 @@ static int chcr_aead_cra_init(struct crypto_aead *tfm) CRYPTO_ALG_ASYNC); if (IS_ERR(aeadctx->sw_cipher)) return PTR_ERR(aeadctx->sw_cipher); - crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx), - sizeof(struct aead_request) + - crypto_aead_reqsize(aeadctx->sw_cipher))); + crypto_aead_set_reqsize_dma( + tfm, max(sizeof(struct chcr_aead_reqctx), + sizeof(struct aead_request) + + crypto_aead_reqsize(aeadctx->sw_cipher))); return chcr_device_init(a_ctx(tfm)); } @@ -3735,7 +3736,7 @@ static int chcr_aead_op(struct aead_request *req, create_wr_t create_wr_fn) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct chcr_context *ctx = a_ctx(tfm); struct uld_ctx *u_ctx = ULD_CTX(ctx); struct sk_buff *skb; @@ -3785,7 +3786,7 @@ static int chcr_aead_op(struct aead_request *req, static int chcr_aead_encrypt(struct aead_request *req) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); struct chcr_context *ctx = a_ctx(tfm); unsigned int cpu; @@ -3816,7 +3817,7 @@ static int chcr_aead_decrypt(struct aead_request *req) struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct chcr_context *ctx = a_ctx(tfm); struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx); - struct chcr_aead_reqctx *reqctx = aead_request_ctx(req); + struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req); int size; unsigned int cpu; diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h index c7816c83e32461dd44f028a8904c5a982ca273e8..7f88ddb086313e5016936c960ade224ab8e14535 100644 --- a/drivers/crypto/chelsio/chcr_crypto.h +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -248,9 +248,9 @@ struct hmac_ctx { struct __crypto_ctx { union { - DECLARE_FLEX_ARRAY(struct hmac_ctx, hmacctx); - DECLARE_FLEX_ARRAY(struct ablk_ctx, ablkctx); - DECLARE_FLEX_ARRAY(struct chcr_aead_ctx, aeadctx); + struct hmac_ctx hmacctx; + struct ablk_ctx ablkctx; + struct chcr_aead_ctx aeadctx; }; }; diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig index 27e1fa91206390d62f4b65a52d75980f5001b279..743ce4fc3158c68b46a6b0ded41b8f21b9345dcb 100644 --- a/drivers/crypto/hisilicon/Kconfig +++ b/drivers/crypto/hisilicon/Kconfig @@ -26,7 +26,7 @@ config CRYPTO_DEV_HISI_SEC2 select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 - select CRYPTO_SM4 + select CRYPTO_SM4_GENERIC depends on PCI && PCI_MSI depends on UACCE || UACCE=n depends on ARM64 || (COMPILE_TEST && 64BIT) diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile index 1e89269a2e4b01192af6953ac12c9018a487d4a3..8595a5a5d22888d2e9e3d090f170057971f77fe8 100644 --- a/drivers/crypto/hisilicon/Makefile +++ b/drivers/crypto/hisilicon/Makefile @@ -3,6 +3,6 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hpre/ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/ obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/ obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o -hisi_qm-objs = qm.o sgl.o +hisi_qm-objs = qm.o sgl.o debugfs.o obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/ obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += trng/ diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c new file mode 100644 index 0000000000000000000000000000000000000000..2cc1591949db7e078846115e5bd646e6afe2e37f --- /dev/null +++ b/drivers/crypto/hisilicon/debugfs.c @@ -0,0 +1,1147 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022 HiSilicon Limited. */ +#include +#include "qm_common.h" + +#define QM_DFX_BASE 0x0100000 +#define QM_DFX_STATE1 0x0104000 +#define QM_DFX_STATE2 0x01040C8 +#define QM_DFX_COMMON 0x0000 +#define QM_DFX_BASE_LEN 0x5A +#define QM_DFX_STATE1_LEN 0x2E +#define QM_DFX_STATE2_LEN 0x11 +#define QM_DFX_COMMON_LEN 0xC3 +#define QM_DFX_REGS_LEN 4UL +#define QM_DBG_TMP_BUF_LEN 22 +#define CURRENT_FUN_MASK GENMASK(5, 0) +#define CURRENT_Q_MASK GENMASK(31, 16) +#define QM_SQE_ADDR_MASK GENMASK(7, 0) + +#define QM_DFX_MB_CNT_VF 0x104010 +#define QM_DFX_DB_CNT_VF 0x104020 +#define QM_DFX_SQE_CNT_VF_SQN 0x104030 +#define QM_DFX_CQE_CNT_VF_CQN 0x104040 +#define QM_DFX_QN_SHIFT 16 +#define QM_DFX_CNT_CLR_CE 0x100118 +#define QM_DBG_WRITE_LEN 1024 + +static const char * const qm_debug_file_name[] = { + [CURRENT_QM] = "current_qm", + [CURRENT_Q] = "current_q", + [CLEAR_ENABLE] = "clear_enable", +}; + +struct qm_dfx_item { + const char *name; + u32 offset; +}; + +struct qm_cmd_dump_item { + const char *cmd; + char *info_name; + int (*dump_fn)(struct hisi_qm *qm, char *cmd, char *info_name); +}; + +static struct qm_dfx_item qm_dfx_files[] = { + {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)}, + {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)}, + {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)}, + {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)}, + {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)}, +}; + +#define CNT_CYC_REGS_NUM 10 +static const struct debugfs_reg32 qm_dfx_regs[] = { + /* XXX_CNT are reading clear register */ + {"QM_ECC_1BIT_CNT ", 0x104000ull}, + {"QM_ECC_MBIT_CNT ", 0x104008ull}, + {"QM_DFX_MB_CNT ", 0x104018ull}, + {"QM_DFX_DB_CNT ", 0x104028ull}, + {"QM_DFX_SQE_CNT ", 0x104038ull}, + {"QM_DFX_CQE_CNT ", 0x104048ull}, + {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull}, + {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull}, + {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull}, + {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull}, + {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, + {"QM_ECC_1BIT_INF ", 0x104004ull}, + {"QM_ECC_MBIT_INF ", 0x10400cull}, + {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull}, + {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull}, + {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull}, + {"QM_DFX_FF_ST0 ", 0x1040c8ull}, + {"QM_DFX_FF_ST1 ", 0x1040ccull}, + {"QM_DFX_FF_ST2 ", 0x1040d0ull}, + {"QM_DFX_FF_ST3 ", 0x1040d4ull}, + {"QM_DFX_FF_ST4 ", 0x1040d8ull}, + {"QM_DFX_FF_ST5 ", 0x1040dcull}, + {"QM_DFX_FF_ST6 ", 0x1040e0ull}, + {"QM_IN_IDLE_ST ", 0x1040e4ull}, +}; + +static const struct debugfs_reg32 qm_vf_dfx_regs[] = { + {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, +}; + +/* define the QM's dfx regs region and region length */ +static struct dfx_diff_registers qm_diff_regs[] = { + { + .reg_offset = QM_DFX_BASE, + .reg_len = QM_DFX_BASE_LEN, + }, { + .reg_offset = QM_DFX_STATE1, + .reg_len = QM_DFX_STATE1_LEN, + }, { + .reg_offset = QM_DFX_STATE2, + .reg_len = QM_DFX_STATE2_LEN, + }, { + .reg_offset = QM_DFX_COMMON, + .reg_len = QM_DFX_COMMON_LEN, + }, +}; + +static struct hisi_qm *file_to_qm(struct debugfs_file *file) +{ + struct qm_debug *debug = file->debug; + + return container_of(debug, struct hisi_qm, debug); +} + +static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, + size_t count, loff_t *pos) +{ + char buf[QM_DBG_READ_LEN]; + int len; + + len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", + "Please echo help to cmd to get help information"); + + return simple_read_from_buffer(buffer, count, pos, buf, len); +} + +static void dump_show(struct hisi_qm *qm, void *info, + unsigned int info_size, char *info_name) +{ + struct device *dev = &qm->pdev->dev; + u8 *info_curr = info; + u32 i; +#define BYTE_PER_DW 4 + + dev_info(dev, "%s DUMP\n", info_name); + for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) { + pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW, + *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr)); + } +} + +static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name) +{ + struct device *dev = &qm->pdev->dev; + struct qm_sqc *sqc, *sqc_curr; + dma_addr_t sqc_dma; + u32 qp_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &qp_id); + if (ret || qp_id >= qm->qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); + return -EINVAL; + } + + sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma); + if (IS_ERR(sqc)) + return PTR_ERR(sqc); + + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1); + if (ret) { + down_read(&qm->qps_lock); + if (qm->sqc) { + sqc_curr = qm->sqc + qp_id; + + dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC"); + } + up_read(&qm->qps_lock); + + goto free_ctx; + } + + dump_show(qm, sqc, sizeof(*sqc), name); + +free_ctx: + hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); + return 0; +} + +static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name) +{ + struct device *dev = &qm->pdev->dev; + struct qm_cqc *cqc, *cqc_curr; + dma_addr_t cqc_dma; + u32 qp_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &qp_id); + if (ret || qp_id >= qm->qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); + return -EINVAL; + } + + cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma); + if (IS_ERR(cqc)) + return PTR_ERR(cqc); + + ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1); + if (ret) { + down_read(&qm->qps_lock); + if (qm->cqc) { + cqc_curr = qm->cqc + qp_id; + + dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC"); + } + up_read(&qm->qps_lock); + + goto free_ctx; + } + + dump_show(qm, cqc, sizeof(*cqc), name); + +free_ctx: + hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); + return 0; +} + +static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name) +{ + struct device *dev = &qm->pdev->dev; + dma_addr_t xeqc_dma; + size_t size; + void *xeqc; + int ret; + u8 cmd; + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + if (!strcmp(name, "EQC")) { + cmd = QM_MB_CMD_EQC; + size = sizeof(struct qm_eqc); + } else { + cmd = QM_MB_CMD_AEQC; + size = sizeof(struct qm_aeqc); + } + + xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma); + if (IS_ERR(xeqc)) + return PTR_ERR(xeqc); + + ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); + if (ret) + goto err_free_ctx; + + dump_show(qm, xeqc, size, name); + +err_free_ctx: + hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma); + return ret; +} + +static int q_dump_param_parse(struct hisi_qm *qm, char *s, + u32 *e_id, u32 *q_id, u16 q_depth) +{ + struct device *dev = &qm->pdev->dev; + unsigned int qp_num = qm->qp_num; + char *presult; + int ret; + + presult = strsep(&s, " "); + if (!presult) { + dev_err(dev, "Please input qp number!\n"); + return -EINVAL; + } + + ret = kstrtou32(presult, 0, q_id); + if (ret || *q_id >= qp_num) { + dev_err(dev, "Please input qp num (0-%u)", qp_num - 1); + return -EINVAL; + } + + presult = strsep(&s, " "); + if (!presult) { + dev_err(dev, "Please input sqe number!\n"); + return -EINVAL; + } + + ret = kstrtou32(presult, 0, e_id); + if (ret || *e_id >= q_depth) { + dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1); + return -EINVAL; + } + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + return 0; +} + +static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name) +{ + u16 sq_depth = qm->qp_array->cq_depth; + void *sqe, *sqe_curr; + struct hisi_qp *qp; + u32 qp_id, sqe_id; + int ret; + + ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); + if (ret) + return ret; + + sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); + if (!sqe) + return -ENOMEM; + + qp = &qm->qp_array[qp_id]; + memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); + sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); + memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, + qm->debug.sqe_mask_len); + + dump_show(qm, sqe_curr, qm->sqe_size, name); + + kfree(sqe); + + return 0; +} + +static int qm_cq_dump(struct hisi_qm *qm, char *s, char *name) +{ + struct qm_cqe *cqe_curr; + struct hisi_qp *qp; + u32 qp_id, cqe_id; + int ret; + + ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth); + if (ret) + return ret; + + qp = &qm->qp_array[qp_id]; + cqe_curr = qp->cqe + cqe_id; + dump_show(qm, cqe_curr, sizeof(struct qm_cqe), name); + + return 0; +} + +static int qm_eq_aeq_dump(struct hisi_qm *qm, char *s, char *name) +{ + struct device *dev = &qm->pdev->dev; + u16 xeq_depth; + size_t size; + void *xeqe; + u32 xeqe_id; + int ret; + + if (!s) + return -EINVAL; + + ret = kstrtou32(s, 0, &xeqe_id); + if (ret) + return -EINVAL; + + if (!strcmp(name, "EQE")) { + xeq_depth = qm->eq_depth; + size = sizeof(struct qm_eqe); + } else { + xeq_depth = qm->aeq_depth; + size = sizeof(struct qm_aeqe); + } + + if (xeqe_id >= xeq_depth) { + dev_err(dev, "Please input eqe or aeqe num (0-%u)", xeq_depth - 1); + return -EINVAL; + } + + down_read(&qm->qps_lock); + + if (qm->eqe && !strcmp(name, "EQE")) { + xeqe = qm->eqe + xeqe_id; + } else if (qm->aeqe && !strcmp(name, "AEQE")) { + xeqe = qm->aeqe + xeqe_id; + } else { + ret = -EINVAL; + goto err_unlock; + } + + dump_show(qm, xeqe, size, name); + +err_unlock: + up_read(&qm->qps_lock); + return ret; +} + +static int qm_dbg_help(struct hisi_qm *qm, char *s) +{ + struct device *dev = &qm->pdev->dev; + + if (strsep(&s, " ")) { + dev_err(dev, "Please do not input extra characters!\n"); + return -EINVAL; + } + + dev_info(dev, "available commands:\n"); + dev_info(dev, "sqc \n"); + dev_info(dev, "cqc \n"); + dev_info(dev, "eqc\n"); + dev_info(dev, "aeqc\n"); + dev_info(dev, "sq \n"); + dev_info(dev, "cq \n"); + dev_info(dev, "eq \n"); + dev_info(dev, "aeq \n"); + + return 0; +} + +static const struct qm_cmd_dump_item qm_cmd_dump_table[] = { + { + .cmd = "sqc", + .info_name = "SQC", + .dump_fn = qm_sqc_dump, + }, { + .cmd = "cqc", + .info_name = "CQC", + .dump_fn = qm_cqc_dump, + }, { + .cmd = "eqc", + .info_name = "EQC", + .dump_fn = qm_eqc_aeqc_dump, + }, { + .cmd = "aeqc", + .info_name = "AEQC", + .dump_fn = qm_eqc_aeqc_dump, + }, { + .cmd = "sq", + .info_name = "SQE", + .dump_fn = qm_sq_dump, + }, { + .cmd = "cq", + .info_name = "CQE", + .dump_fn = qm_cq_dump, + }, { + .cmd = "eq", + .info_name = "EQE", + .dump_fn = qm_eq_aeq_dump, + }, { + .cmd = "aeq", + .info_name = "AEQE", + .dump_fn = qm_eq_aeq_dump, + }, +}; + +static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) +{ + struct device *dev = &qm->pdev->dev; + char *presult, *s, *s_tmp; + int table_size, i, ret; + + s = kstrdup(cmd_buf, GFP_KERNEL); + if (!s) + return -ENOMEM; + + s_tmp = s; + presult = strsep(&s, " "); + if (!presult) { + ret = -EINVAL; + goto err_buffer_free; + } + + if (!strcmp(presult, "help")) { + ret = qm_dbg_help(qm, s); + goto err_buffer_free; + } + + table_size = ARRAY_SIZE(qm_cmd_dump_table); + for (i = 0; i < table_size; i++) { + if (!strcmp(presult, qm_cmd_dump_table[i].cmd)) { + ret = qm_cmd_dump_table[i].dump_fn(qm, s, + qm_cmd_dump_table[i].info_name); + break; + } + } + + if (i == table_size) { + dev_info(dev, "Please echo help\n"); + ret = -EINVAL; + } + +err_buffer_free: + kfree(s_tmp); + + return ret; +} + +static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char *cmd_buf, *cmd_buf_tmp; + int ret; + + if (*pos) + return 0; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + /* Judge if the instance is being reset. */ + if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) { + ret = 0; + goto put_dfx_access; + } + + if (count > QM_DBG_WRITE_LEN) { + ret = -ENOSPC; + goto put_dfx_access; + } + + cmd_buf = memdup_user_nul(buffer, count); + if (IS_ERR(cmd_buf)) { + ret = PTR_ERR(cmd_buf); + goto put_dfx_access; + } + + cmd_buf_tmp = strchr(cmd_buf, '\n'); + if (cmd_buf_tmp) { + *cmd_buf_tmp = '\0'; + count = cmd_buf_tmp - cmd_buf + 1; + } + + ret = qm_cmd_write_dump(qm, cmd_buf); + if (ret) { + kfree(cmd_buf); + goto put_dfx_access; + } + + kfree(cmd_buf); + + ret = count; + +put_dfx_access: + hisi_qm_put_dfx_access(qm); + return ret; +} + +static const struct file_operations qm_cmd_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_cmd_read, + .write = qm_cmd_write, +}; + +/** + * hisi_qm_regs_dump() - Dump registers's value. + * @s: debugfs file handle. + * @regset: accelerator registers information. + * + * Dump accelerator registers. + */ +void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) +{ + struct pci_dev *pdev = to_pci_dev(regset->dev); + struct hisi_qm *qm = pci_get_drvdata(pdev); + const struct debugfs_reg32 *regs = regset->regs; + int regs_len = regset->nregs; + int i, ret; + u32 val; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + for (i = 0; i < regs_len; i++) { + val = readl(regset->base + regs[i].offset); + seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); + } + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); + +static int qm_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + struct debugfs_regset32 regset; + + if (qm->fun_type == QM_HW_PF) { + regset.regs = qm_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_dfx_regs); + } else { + regset.regs = qm_vf_dfx_regs; + regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); + } + + regset.base = qm->io_base; + regset.dev = &qm->pdev->dev; + + hisi_qm_regs_dump(s, ®set); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(qm_regs); + +static u32 current_q_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; +} + +static int current_q_write(struct hisi_qm *qm, u32 val) +{ + u32 tmp; + + if (val >= qm->debug.curr_qm_qp_num) + return -EINVAL; + + tmp = val << QM_DFX_QN_SHIFT | + (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK); + writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + + tmp = val << QM_DFX_QN_SHIFT | + (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK); + writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + return 0; +} + +static u32 clear_enable_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_CNT_CLR_CE); +} + +/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ +static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) +{ + if (rd_clr_ctrl > 1) + return -EINVAL; + + writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE); + + return 0; +} + +static u32 current_qm_read(struct hisi_qm *qm) +{ + return readl(qm->io_base + QM_DFX_MB_CNT_VF); +} + +static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num) +{ + u32 remain_q_num, vfq_num; + u32 num_vfs = qm->vfs_num; + + vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs; + if (vfq_num >= qm->max_qp_num) + return qm->max_qp_num; + + remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs; + if (vfq_num + remain_q_num <= qm->max_qp_num) + return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num; + + /* + * if vfq_num + remain_q_num > max_qp_num, the last VFs, + * each with one more queue. + */ + return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num; +} + +static int current_qm_write(struct hisi_qm *qm, u32 val) +{ + u32 tmp; + + if (val > qm->vfs_num) + return -EINVAL; + + /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */ + if (!val) + qm->debug.curr_qm_qp_num = qm->qp_num; + else + qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val); + + writel(val, qm->io_base + QM_DFX_MB_CNT_VF); + writel(val, qm->io_base + QM_DFX_DB_CNT_VF); + + tmp = val | + (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK); + writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + + tmp = val | + (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK); + writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + return 0; +} + +static ssize_t qm_debug_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct debugfs_file *file = filp->private_data; + enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); + char tbuf[QM_DBG_TMP_BUF_LEN]; + u32 val; + int ret; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + mutex_lock(&file->lock); + switch (index) { + case CURRENT_QM: + val = current_qm_read(qm); + break; + case CURRENT_Q: + val = current_q_read(qm); + break; + case CLEAR_ENABLE: + val = clear_enable_read(qm); + break; + default: + goto err_input; + } + mutex_unlock(&file->lock); + + hisi_qm_put_dfx_access(qm); + ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); + return simple_read_from_buffer(buf, count, pos, tbuf, ret); + +err_input: + mutex_unlock(&file->lock); + hisi_qm_put_dfx_access(qm); + return -EINVAL; +} + +static ssize_t qm_debug_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct debugfs_file *file = filp->private_data; + enum qm_debug_file index = file->index; + struct hisi_qm *qm = file_to_qm(file); + unsigned long val; + char tbuf[QM_DBG_TMP_BUF_LEN]; + int len, ret; + + if (*pos != 0) + return 0; + + if (count >= QM_DBG_TMP_BUF_LEN) + return -ENOSPC; + + len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf, + count); + if (len < 0) + return len; + + tbuf[len] = '\0'; + if (kstrtoul(tbuf, 0, &val)) + return -EFAULT; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return ret; + + mutex_lock(&file->lock); + switch (index) { + case CURRENT_QM: + ret = current_qm_write(qm, val); + break; + case CURRENT_Q: + ret = current_q_write(qm, val); + break; + case CLEAR_ENABLE: + ret = clear_enable_write(qm, val); + break; + default: + ret = -EINVAL; + } + mutex_unlock(&file->lock); + + hisi_qm_put_dfx_access(qm); + + if (ret) + return ret; + + return count; +} + +static const struct file_operations qm_debug_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_debug_read, + .write = qm_debug_write, +}; + +static void dfx_regs_uninit(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, int reg_len) +{ + int i; + + /* Setting the pointer is NULL to prevent double free */ + for (i = 0; i < reg_len; i++) { + kfree(dregs[i].regs); + dregs[i].regs = NULL; + } + kfree(dregs); +} + +static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, + const struct dfx_diff_registers *cregs, u32 reg_len) +{ + struct dfx_diff_registers *diff_regs; + u32 j, base_offset; + int i; + + diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); + if (!diff_regs) + return ERR_PTR(-ENOMEM); + + for (i = 0; i < reg_len; i++) { + if (!cregs[i].reg_len) + continue; + + diff_regs[i].reg_offset = cregs[i].reg_offset; + diff_regs[i].reg_len = cregs[i].reg_len; + diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, + GFP_KERNEL); + if (!diff_regs[i].regs) + goto alloc_error; + + for (j = 0; j < diff_regs[i].reg_len; j++) { + base_offset = diff_regs[i].reg_offset + + j * QM_DFX_REGS_LEN; + diff_regs[i].regs[j] = readl(qm->io_base + base_offset); + } + } + + return diff_regs; + +alloc_error: + while (i > 0) { + i--; + kfree(diff_regs[i].regs); + } + kfree(diff_regs); + return ERR_PTR(-ENOMEM); +} + +static int qm_diff_regs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) +{ + qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + if (IS_ERR(qm->debug.qm_diff_regs)) + return PTR_ERR(qm->debug.qm_diff_regs); + + qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); + if (IS_ERR(qm->debug.acc_diff_regs)) { + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); + return PTR_ERR(qm->debug.acc_diff_regs); + } + + return 0; +} + +static void qm_last_regs_uninit(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + kfree(debug->qm_last_words); + debug->qm_last_words = NULL; +} + +static int qm_last_regs_init(struct hisi_qm *qm) +{ + int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); + struct qm_debug *debug = &qm->debug; + int i; + + if (qm->fun_type == QM_HW_VF) + return 0; + + debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL); + if (!debug->qm_last_words) + return -ENOMEM; + + for (i = 0; i < dfx_regs_num; i++) { + debug->qm_last_words[i] = readl_relaxed(qm->io_base + + qm_dfx_regs[i].offset); + } + + return 0; +} + +static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len) +{ + dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); + dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); +} + +/** + * hisi_qm_regs_debugfs_init() - Allocate memory for registers. + * @qm: device qm handle. + * @dregs: diff registers handle. + * @reg_len: diff registers region length. + */ +int hisi_qm_regs_debugfs_init(struct hisi_qm *qm, + struct dfx_diff_registers *dregs, u32 reg_len) +{ + int ret; + + if (!qm || !dregs) + return -EINVAL; + + if (qm->fun_type != QM_HW_PF) + return 0; + + ret = qm_last_regs_init(qm); + if (ret) { + dev_info(&qm->pdev->dev, "failed to init qm words memory!\n"); + return ret; + } + + ret = qm_diff_regs_init(qm, dregs, reg_len); + if (ret) { + qm_last_regs_uninit(qm); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init); + +/** + * hisi_qm_regs_debugfs_uninit() - Free memory for registers. + * @qm: device qm handle. + * @reg_len: diff registers region length. + */ +void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len) +{ + if (!qm || qm->fun_type != QM_HW_PF) + return; + + qm_diff_regs_uninit(qm, reg_len); + qm_last_regs_uninit(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit); + +/** + * hisi_qm_acc_diff_regs_dump() - Dump registers's value. + * @qm: device qm handle. + * @s: Debugfs file handle. + * @dregs: diff registers handle. + * @regs_len: diff registers region length. + */ +void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, + struct dfx_diff_registers *dregs, u32 regs_len) +{ + u32 j, val, base_offset; + int i, ret; + + if (!qm || !s || !dregs) + return; + + ret = hisi_qm_get_dfx_access(qm); + if (ret) + return; + + down_read(&qm->qps_lock); + for (i = 0; i < regs_len; i++) { + if (!dregs[i].reg_len) + continue; + + for (j = 0; j < dregs[i].reg_len; j++) { + base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; + val = readl(qm->io_base + base_offset); + if (val != dregs[i].regs[j]) + seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", + base_offset, dregs[i].regs[j], val); + } + } + up_read(&qm->qps_lock); + + hisi_qm_put_dfx_access(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); + +void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm) +{ + struct qm_debug *debug = &qm->debug; + struct pci_dev *pdev = qm->pdev; + u32 val; + int i; + + if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) + return; + + for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { + val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); + if (debug->qm_last_words[i] != val) + pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", + qm_dfx_regs[i].name, debug->qm_last_words[i], val); + } +} + +static int qm_diff_regs_show(struct seq_file *s, void *unused) +{ + struct hisi_qm *qm = s->private; + + hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, + ARRAY_SIZE(qm_diff_regs)); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); + +static ssize_t qm_status_read(struct file *filp, char __user *buffer, + size_t count, loff_t *pos) +{ + struct hisi_qm *qm = filp->private_data; + char buf[QM_DBG_READ_LEN]; + int val, len; + + val = atomic_read(&qm->status.flags); + len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]); + + return simple_read_from_buffer(buffer, count, pos, buf, len); +} + +static const struct file_operations qm_status_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = qm_status_read, +}; + +static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, + enum qm_debug_file index) +{ + struct debugfs_file *file = qm->debug.files + index; + + debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, + &qm_debug_fops); + + file->index = index; + mutex_init(&file->lock); + file->debug = &qm->debug; +} + +static int qm_debugfs_atomic64_set(void *data, u64 val) +{ + if (val) + return -EINVAL; + + atomic64_set((atomic64_t *)data, 0); + + return 0; +} + +static int qm_debugfs_atomic64_get(void *data, u64 *val) +{ + *val = atomic64_read((atomic64_t *)data); + + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, + qm_debugfs_atomic64_set, "%llu\n"); + +/** + * hisi_qm_debug_init() - Initialize qm related debugfs files. + * @qm: The qm for which we want to add debugfs files. + * + * Create qm related debugfs files. + */ +void hisi_qm_debug_init(struct hisi_qm *qm) +{ + struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; + struct qm_dfx *dfx = &qm->debug.dfx; + struct dentry *qm_d; + void *data; + int i; + + qm_d = debugfs_create_dir("qm", qm->debug.debug_root); + qm->debug.qm_d = qm_d; + + /* only show this in PF */ + if (qm->fun_type == QM_HW_PF) { + qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); + for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) + qm_create_debugfs_file(qm, qm->debug.qm_d, i); + } + + if (qm_regs) + debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, + qm, &qm_diff_regs_fops); + + debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); + + debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); + + debugfs_create_file("status", 0444, qm->debug.qm_d, qm, + &qm_status_fops); + for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { + data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); + debugfs_create_file(qm_dfx_files[i].name, + 0644, + qm_d, + data, + &qm_atomic64_ops); + } + + if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) + hisi_qm_set_algqos_init(qm); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_init); + +/** + * hisi_qm_debug_regs_clear() - clear qm debug related registers. + * @qm: The qm for which we want to clear its debug registers. + */ +void hisi_qm_debug_regs_clear(struct hisi_qm *qm) +{ + const struct debugfs_reg32 *regs; + int i; + + /* clear current_qm */ + writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); + writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); + + /* clear current_q */ + writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); + writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); + + /* + * these registers are reading and clearing, so clear them after + * reading them. + */ + writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); + + regs = qm_dfx_regs; + for (i = 0; i < CNT_CYC_REGS_NUM; i++) { + readl(qm->io_base + regs->offset); + regs++; + } + + /* clear clear_enable */ + writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); +} +EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c index ef02dadd6217098fe963416973e82a00db3970c4..8ede77310dc528c542c1d12dea0377ed9157c11f 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c +++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c @@ -147,6 +147,16 @@ struct hpre_asym_request { struct timespec64 req_time; }; +static inline unsigned int hpre_align_sz(void) +{ + return ((crypto_dma_align() - 1) | (HPRE_ALIGN_SZ - 1)) + 1; +} + +static inline unsigned int hpre_align_pd(void) +{ + return (hpre_align_sz() - 1) & ~(crypto_tfm_ctx_alignment() - 1); +} + static int hpre_alloc_req_id(struct hpre_ctx *ctx) { unsigned long flags; @@ -517,7 +527,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) } tmp = akcipher_request_ctx(akreq); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_rsa_cb; h_req->areq.rsa = akreq; msg = &h_req->req; @@ -531,7 +541,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa) } tmp = kpp_request_ctx(kreq); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_dh_cb; h_req->areq.dh = kreq; msg = &h_req->req; @@ -582,7 +592,7 @@ static int hpre_dh_compute_value(struct kpp_request *req) struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -740,6 +750,8 @@ static int hpre_dh_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); + return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); } @@ -783,7 +795,7 @@ static int hpre_rsa_enc(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm); void *tmp = akcipher_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -831,7 +843,7 @@ static int hpre_rsa_dec(struct akcipher_request *req) struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm); void *tmp = akcipher_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1165,6 +1177,9 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm) return PTR_ERR(ctx->rsa.soft_tfm); } + akcipher_set_reqsize(tfm, sizeof(struct hpre_asym_request) + + hpre_align_pd()); + ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE); if (ret) crypto_free_akcipher(ctx->rsa.soft_tfm); @@ -1485,7 +1500,7 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx, } tmp = kpp_request_ctx(req); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_ecdh_cb; h_req->areq.ecdh = req; msg = &h_req->req; @@ -1566,7 +1581,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req) struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1617,6 +1632,8 @@ static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P192; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1626,6 +1643,8 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P256; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1635,6 +1654,8 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm) ctx->curve_id = ECC_CURVE_NIST_P384; + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1791,7 +1812,7 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx, } tmp = kpp_request_ctx(req); - h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + h_req = PTR_ALIGN(tmp, hpre_align_sz()); h_req->cb = hpre_curve25519_cb; h_req->areq.curve25519 = req; msg = &h_req->req; @@ -1912,7 +1933,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req) struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); struct device *dev = ctx->dev; void *tmp = kpp_request_ctx(req); - struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ); + struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz()); struct hpre_sqe *msg = &hpre_req->req; int ret; @@ -1961,6 +1982,8 @@ static int hpre_curve25519_init_tfm(struct crypto_kpp *tfm) { struct hpre_ctx *ctx = kpp_tfm_ctx(tfm); + kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd()); + return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE); } @@ -1981,7 +2004,6 @@ static struct akcipher_alg rsa = { .max_size = hpre_rsa_max_size, .init = hpre_rsa_init_tfm, .exit = hpre_rsa_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -1998,7 +2020,6 @@ static struct kpp_alg dh = { .max_size = hpre_dh_max_size, .init = hpre_dh_init_tfm, .exit = hpre_dh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2016,7 +2037,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p192_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2031,7 +2051,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p256_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2046,7 +2065,6 @@ static struct kpp_alg ecdh_curves[] = { .max_size = hpre_ecdh_max_size, .init = hpre_ecdh_nist_p384_init_tfm, .exit = hpre_ecdh_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, @@ -2064,7 +2082,6 @@ static struct kpp_alg curve25519_alg = { .max_size = hpre_curve25519_max_size, .init = hpre_curve25519_init_tfm, .exit = hpre_curve25519_exit_tfm, - .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ, .base = { .cra_ctxsize = sizeof(struct hpre_ctx), .cra_priority = HPRE_CRYPTO_ALG_PRI, diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c index 471e5ca720f5781551cffe39744deb0c800d15ac..923f9c2792654f7e2ed739e6041159ba46f6d250 100644 --- a/drivers/crypto/hisilicon/hpre/hpre_main.c +++ b/drivers/crypto/hisilicon/hpre/hpre_main.c @@ -1101,8 +1101,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN; - ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs, - ARRAY_SIZE(hpre_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs)); if (ret) { dev_warn(dev, "Failed to init HPRE diff regs!\n"); goto debugfs_remove; @@ -1121,7 +1120,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove: debugfs_remove_recursive(qm->debug.debug_root); return ret; @@ -1129,7 +1128,7 @@ debugfs_remove: static void hpre_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); } @@ -1437,18 +1436,12 @@ err_with_qm_init: static void hpre_remove(struct pci_dev *pdev) { struct hisi_qm *qm = pci_get_drvdata(pdev); - int ret; hisi_qm_pm_uninit(qm); hisi_qm_wait_task_finish(qm, &hpre_devices); hisi_qm_alg_unregister(qm, &hpre_devices); - if (qm->fun_type == QM_HW_PF && qm->vfs_num) { - ret = hisi_qm_sriov_disable(pdev, true); - if (ret) { - pci_err(pdev, "Disable SRIOV fail!\n"); - return; - } - } + if (qm->fun_type == QM_HW_PF && qm->vfs_num) + hisi_qm_sriov_disable(pdev, true); hpre_debugfs_exit(qm); hisi_qm_stop(qm, QM_NORMAL); diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c index 8b387de69d229b7cb6059a6bc5094e277a4be16b..007ac7a69ce74759c2a973cf38c6d3894097b4b0 100644 --- a/drivers/crypto/hisilicon/qm.c +++ b/drivers/crypto/hisilicon/qm.c @@ -16,6 +16,7 @@ #include #include #include +#include "qm_common.h" /* eq/aeq irq enable */ #define QM_VF_AEQ_INT_SOURCE 0x0 @@ -119,8 +120,6 @@ #define QM_SQC_VFT_NUM_SHIFT_V2 45 #define QM_SQC_VFT_NUM_MASK_v2 GENMASK(9, 0) -#define QM_DFX_CNT_CLR_CE 0x100118 - #define QM_ABNORMAL_INT_SOURCE 0x100000 #define QM_ABNORMAL_INT_MASK 0x100004 #define QM_ABNORMAL_INT_MASK_VALUE 0x7fff @@ -187,14 +186,6 @@ #define QM_VF_RESET_WAIT_TIMEOUT_US \ (QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT) -#define QM_DFX_MB_CNT_VF 0x104010 -#define QM_DFX_DB_CNT_VF 0x104020 -#define QM_DFX_SQE_CNT_VF_SQN 0x104030 -#define QM_DFX_CQE_CNT_VF_CQN 0x104040 -#define QM_DFX_QN_SHIFT 16 -#define CURRENT_FUN_MASK GENMASK(5, 0) -#define CURRENT_Q_MASK GENMASK(31, 16) - #define POLL_PERIOD 10 #define POLL_TIMEOUT 1000 #define WAIT_PERIOD_US_MAX 200 @@ -211,19 +202,15 @@ #define QMC_ALIGN(sz) ALIGN(sz, 32) #define QM_DBG_READ_LEN 256 -#define QM_DBG_WRITE_LEN 1024 -#define QM_DBG_TMP_BUF_LEN 22 #define QM_PCI_COMMAND_INVALID ~0 #define QM_RESET_STOP_TX_OFFSET 1 #define QM_RESET_STOP_RX_OFFSET 2 #define WAIT_PERIOD 20 #define REMOVE_WAIT_DELAY 10 -#define QM_SQE_ADDR_MASK GENMASK(7, 0) #define QM_DRIVER_REMOVING 0 #define QM_RST_SCHED 1 -#define QM_RESETTING 2 #define QM_QOS_PARAM_NUM 2 #define QM_QOS_VAL_NUM 1 #define QM_QOS_BDF_PARAM_NUM 4 @@ -250,16 +237,6 @@ #define QM_QOS_MIN_CIR_B 100 #define QM_QOS_MAX_CIR_U 6 #define QM_QOS_MAX_CIR_S 11 -#define QM_QOS_VAL_MAX_LEN 32 -#define QM_DFX_BASE 0x0100000 -#define QM_DFX_STATE1 0x0104000 -#define QM_DFX_STATE2 0x01040C8 -#define QM_DFX_COMMON 0x0000 -#define QM_DFX_BASE_LEN 0x5A -#define QM_DFX_STATE1_LEN 0x2E -#define QM_DFX_STATE2_LEN 0x11 -#define QM_DFX_COMMON_LEN 0xC3 -#define QM_DFX_REGS_LEN 4UL #define QM_AUTOSUSPEND_DELAY 3000 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \ @@ -359,7 +336,7 @@ static const struct hisi_qm_cap_info qm_cap_info_vf[] = { static const struct hisi_qm_cap_info qm_basic_info[] = { {QM_TOTAL_QP_NUM_CAP, 0x100158, 0, GENMASK(10, 0), 0x1000, 0x400, 0x400}, {QM_FUNC_MAX_QP_CAP, 0x100158, 11, GENMASK(10, 0), 0x1000, 0x400, 0x400}, - {QM_XEQ_DEPTH_CAP, 0x3104, 0, GENMASK(15, 0), 0x800, 0x4000800, 0x4000800}, + {QM_XEQ_DEPTH_CAP, 0x3104, 0, GENMASK(31, 0), 0x800, 0x4000800, 0x4000800}, {QM_QP_DEPTH_CAP, 0x3108, 0, GENMASK(31, 0), 0x4000400, 0x4000400, 0x4000400}, {QM_EQ_IRQ_TYPE_CAP, 0x310c, 0, GENMASK(31, 0), 0x10000, 0x10000, 0x10000}, {QM_AEQ_IRQ_TYPE_CAP, 0x3110, 0, GENMASK(31, 0), 0x0, 0x10001, 0x10001}, @@ -369,73 +346,6 @@ static const struct hisi_qm_cap_info qm_basic_info[] = { {QM_VF_IRQ_NUM_CAP, 0x311c, 0, GENMASK(15, 0), 0x1, 0x2, 0x3}, }; -struct qm_cqe { - __le32 rsvd0; - __le16 cmd_id; - __le16 rsvd1; - __le16 sq_head; - __le16 sq_num; - __le16 rsvd2; - __le16 w7; -}; - -struct qm_eqe { - __le32 dw0; -}; - -struct qm_aeqe { - __le32 dw0; -}; - -struct qm_sqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le16 w8; - __le16 rsvd0; - __le16 pasid; - __le16 w11; - __le16 cq_num; - __le16 w13; - __le32 rsvd1; -}; - -struct qm_cqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le16 w8; - __le16 rsvd0; - __le16 pasid; - __le16 w11; - __le32 dw6; - __le32 rsvd1; -}; - -struct qm_eqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le32 rsvd[2]; - __le32 dw6; -}; - -struct qm_aeqc { - __le16 head; - __le16 tail; - __le32 base_l; - __le32 base_h; - __le32 dw3; - __le32 rsvd[2]; - __le32 dw6; -}; - struct qm_mailbox { __le16 w0; __le16 queue_num; @@ -468,25 +378,6 @@ struct hisi_qm_hw_ops { int (*set_msi)(struct hisi_qm *qm, bool set); }; -struct qm_dfx_item { - const char *name; - u32 offset; -}; - -static struct qm_dfx_item qm_dfx_files[] = { - {"err_irq", offsetof(struct qm_dfx, err_irq_cnt)}, - {"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)}, - {"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)}, - {"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)}, - {"mb_err", offsetof(struct qm_dfx, mb_err_cnt)}, -}; - -static const char * const qm_debug_file_name[] = { - [CURRENT_QM] = "current_qm", - [CURRENT_Q] = "current_q", - [CLEAR_ENABLE] = "clear_enable", -}; - struct hisi_qm_hw_error { u32 int_msk; const char *msg; @@ -511,23 +402,6 @@ static const struct hisi_qm_hw_error qm_hw_error[] = { { /* sentinel */ } }; -/* define the QM's dfx regs region and region length */ -static struct dfx_diff_registers qm_diff_regs[] = { - { - .reg_offset = QM_DFX_BASE, - .reg_len = QM_DFX_BASE_LEN, - }, { - .reg_offset = QM_DFX_STATE1, - .reg_len = QM_DFX_STATE1_LEN, - }, { - .reg_offset = QM_DFX_STATE2, - .reg_len = QM_DFX_STATE2_LEN, - }, { - .reg_offset = QM_DFX_COMMON, - .reg_len = QM_DFX_COMMON_LEN, - }, -}; - static const char * const qm_db_timeout[] = { "sq", "cq", "eq", "aeq", }; @@ -536,10 +410,6 @@ static const char * const qm_fifo_overflow[] = { "cq", "eq", "aeq", }; -static const char * const qm_s[] = { - "init", "start", "close", "stop", -}; - static const char * const qp_s[] = { "none", "init", "start", "stop", "close", }; @@ -909,8 +779,8 @@ static void qm_get_xqc_depth(struct hisi_qm *qm, u16 *low_bits, u32 depth; depth = hisi_qm_get_hw_info(qm, qm_basic_info, type, qm->cap_ver); - *high_bits = depth & QM_XQ_DEPTH_MASK; - *low_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK; + *low_bits = depth & QM_XQ_DEPTH_MASK; + *high_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK; } static u32 qm_get_irq_num(struct hisi_qm *qm) @@ -1328,996 +1198,155 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base, case SHAPER_VFT: if (factor) { tmp = factor->cir_b | - (factor->cir_u << QM_SHAPER_FACTOR_CIR_U_SHIFT) | - (factor->cir_s << QM_SHAPER_FACTOR_CIR_S_SHIFT) | - (QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) | - (factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT); - } - break; - } - } - - writel(lower_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_L); - writel(upper_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_H); -} - -static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, - u32 fun_num, u32 base, u32 number) -{ - struct qm_shaper_factor *factor = NULL; - unsigned int val; - int ret; - - if (type == SHAPER_VFT && test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) - factor = &qm->factor[fun_num]; - - ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, - val & BIT(0), POLL_PERIOD, - POLL_TIMEOUT); - if (ret) - return ret; - - writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR); - writel(type, qm->io_base + QM_VFT_CFG_TYPE); - if (type == SHAPER_VFT) - fun_num |= base << QM_SHAPER_VFT_OFFSET; - - writel(fun_num, qm->io_base + QM_VFT_CFG); - - qm_vft_data_cfg(qm, type, base, number, factor); - - writel(0x0, qm->io_base + QM_VFT_CFG_RDY); - writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); - - return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, - val & BIT(0), POLL_PERIOD, - POLL_TIMEOUT); -} - -static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num) -{ - u32 qos = qm->factor[fun_num].func_qos; - int ret, i; - - ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]); - if (ret) { - dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n"); - return ret; - } - writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG); - for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) { - /* The base number of queue reuse for different alg type */ - ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1); - if (ret) - return ret; - } - - return 0; -} - -/* The config should be conducted after qm_dev_mem_reset() */ -static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base, - u32 number) -{ - int ret, i; - - for (i = SQC_VFT; i <= CQC_VFT; i++) { - ret = qm_set_vft_common(qm, i, fun_num, base, number); - if (ret) - return ret; - } - - /* init default shaper qos val */ - if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) { - ret = qm_shaper_init_vft(qm, fun_num); - if (ret) - goto back_sqc_cqc; - } - - return 0; -back_sqc_cqc: - for (i = SQC_VFT; i <= CQC_VFT; i++) - qm_set_vft_common(qm, i, fun_num, 0, 0); - - return ret; -} - -static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) -{ - u64 sqc_vft; - int ret; - - ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); - if (ret) - return ret; - - sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | - ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); - *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); - *number = (QM_SQC_VFT_NUM_MASK_v2 & - (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; - - return 0; -} - -static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num) -{ - u32 remain_q_num, vfq_num; - u32 num_vfs = qm->vfs_num; - - vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs; - if (vfq_num >= qm->max_qp_num) - return qm->max_qp_num; - - remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs; - if (vfq_num + remain_q_num <= qm->max_qp_num) - return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num; - - /* - * if vfq_num + remain_q_num > max_qp_num, the last VFs, - * each with one more queue. - */ - return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num; -} - -static struct hisi_qm *file_to_qm(struct debugfs_file *file) -{ - struct qm_debug *debug = file->debug; - - return container_of(debug, struct hisi_qm, debug); -} - -static u32 current_q_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT; -} - -static int current_q_write(struct hisi_qm *qm, u32 val) -{ - u32 tmp; - - if (val >= qm->debug.curr_qm_qp_num) - return -EINVAL; - - tmp = val << QM_DFX_QN_SHIFT | - (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK); - writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - - tmp = val << QM_DFX_QN_SHIFT | - (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK); - writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - return 0; -} - -static u32 clear_enable_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_CNT_CLR_CE); -} - -/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */ -static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl) -{ - if (rd_clr_ctrl > 1) - return -EINVAL; - - writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE); - - return 0; -} - -static u32 current_qm_read(struct hisi_qm *qm) -{ - return readl(qm->io_base + QM_DFX_MB_CNT_VF); -} - -static int current_qm_write(struct hisi_qm *qm, u32 val) -{ - u32 tmp; - - if (val > qm->vfs_num) - return -EINVAL; - - /* According PF or VF Dev ID to calculation curr_qm_qp_num and store */ - if (!val) - qm->debug.curr_qm_qp_num = qm->qp_num; - else - qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val); - - writel(val, qm->io_base + QM_DFX_MB_CNT_VF); - writel(val, qm->io_base + QM_DFX_DB_CNT_VF); - - tmp = val | - (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK); - writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - - tmp = val | - (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK); - writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - return 0; -} - -static ssize_t qm_debug_read(struct file *filp, char __user *buf, - size_t count, loff_t *pos) -{ - struct debugfs_file *file = filp->private_data; - enum qm_debug_file index = file->index; - struct hisi_qm *qm = file_to_qm(file); - char tbuf[QM_DBG_TMP_BUF_LEN]; - u32 val; - int ret; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return ret; - - mutex_lock(&file->lock); - switch (index) { - case CURRENT_QM: - val = current_qm_read(qm); - break; - case CURRENT_Q: - val = current_q_read(qm); - break; - case CLEAR_ENABLE: - val = clear_enable_read(qm); - break; - default: - goto err_input; - } - mutex_unlock(&file->lock); - - hisi_qm_put_dfx_access(qm); - ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val); - return simple_read_from_buffer(buf, count, pos, tbuf, ret); - -err_input: - mutex_unlock(&file->lock); - hisi_qm_put_dfx_access(qm); - return -EINVAL; -} - -static ssize_t qm_debug_write(struct file *filp, const char __user *buf, - size_t count, loff_t *pos) -{ - struct debugfs_file *file = filp->private_data; - enum qm_debug_file index = file->index; - struct hisi_qm *qm = file_to_qm(file); - unsigned long val; - char tbuf[QM_DBG_TMP_BUF_LEN]; - int len, ret; - - if (*pos != 0) - return 0; - - if (count >= QM_DBG_TMP_BUF_LEN) - return -ENOSPC; - - len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf, - count); - if (len < 0) - return len; - - tbuf[len] = '\0'; - if (kstrtoul(tbuf, 0, &val)) - return -EFAULT; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return ret; - - mutex_lock(&file->lock); - switch (index) { - case CURRENT_QM: - ret = current_qm_write(qm, val); - break; - case CURRENT_Q: - ret = current_q_write(qm, val); - break; - case CLEAR_ENABLE: - ret = clear_enable_write(qm, val); - break; - default: - ret = -EINVAL; - } - mutex_unlock(&file->lock); - - hisi_qm_put_dfx_access(qm); - - if (ret) - return ret; - - return count; -} - -static const struct file_operations qm_debug_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_debug_read, - .write = qm_debug_write, -}; - -#define CNT_CYC_REGS_NUM 10 -static const struct debugfs_reg32 qm_dfx_regs[] = { - /* XXX_CNT are reading clear register */ - {"QM_ECC_1BIT_CNT ", 0x104000ull}, - {"QM_ECC_MBIT_CNT ", 0x104008ull}, - {"QM_DFX_MB_CNT ", 0x104018ull}, - {"QM_DFX_DB_CNT ", 0x104028ull}, - {"QM_DFX_SQE_CNT ", 0x104038ull}, - {"QM_DFX_CQE_CNT ", 0x104048ull}, - {"QM_DFX_SEND_SQE_TO_ACC_CNT ", 0x104050ull}, - {"QM_DFX_WB_SQE_FROM_ACC_CNT ", 0x104058ull}, - {"QM_DFX_ACC_FINISH_CNT ", 0x104060ull}, - {"QM_DFX_CQE_ERR_CNT ", 0x1040b4ull}, - {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, - {"QM_ECC_1BIT_INF ", 0x104004ull}, - {"QM_ECC_MBIT_INF ", 0x10400cull}, - {"QM_DFX_ACC_RDY_VLD0 ", 0x1040a0ull}, - {"QM_DFX_ACC_RDY_VLD1 ", 0x1040a4ull}, - {"QM_DFX_AXI_RDY_VLD ", 0x1040a8ull}, - {"QM_DFX_FF_ST0 ", 0x1040c8ull}, - {"QM_DFX_FF_ST1 ", 0x1040ccull}, - {"QM_DFX_FF_ST2 ", 0x1040d0ull}, - {"QM_DFX_FF_ST3 ", 0x1040d4ull}, - {"QM_DFX_FF_ST4 ", 0x1040d8ull}, - {"QM_DFX_FF_ST5 ", 0x1040dcull}, - {"QM_DFX_FF_ST6 ", 0x1040e0ull}, - {"QM_IN_IDLE_ST ", 0x1040e4ull}, -}; - -static const struct debugfs_reg32 qm_vf_dfx_regs[] = { - {"QM_DFX_FUNS_ACTIVE_ST ", 0x200ull}, -}; - -/** - * hisi_qm_regs_dump() - Dump registers's value. - * @s: debugfs file handle. - * @regset: accelerator registers information. - * - * Dump accelerator registers. - */ -void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset) -{ - struct pci_dev *pdev = to_pci_dev(regset->dev); - struct hisi_qm *qm = pci_get_drvdata(pdev); - const struct debugfs_reg32 *regs = regset->regs; - int regs_len = regset->nregs; - int i, ret; - u32 val; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return; - - for (i = 0; i < regs_len; i++) { - val = readl(regset->base + regs[i].offset); - seq_printf(s, "%s= 0x%08x\n", regs[i].name, val); - } - - hisi_qm_put_dfx_access(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_regs_dump); - -static int qm_regs_show(struct seq_file *s, void *unused) -{ - struct hisi_qm *qm = s->private; - struct debugfs_regset32 regset; - - if (qm->fun_type == QM_HW_PF) { - regset.regs = qm_dfx_regs; - regset.nregs = ARRAY_SIZE(qm_dfx_regs); - } else { - regset.regs = qm_vf_dfx_regs; - regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs); - } - - regset.base = qm->io_base; - regset.dev = &qm->pdev->dev; - - hisi_qm_regs_dump(s, ®set); - - return 0; -} - -DEFINE_SHOW_ATTRIBUTE(qm_regs); - -static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm, - const struct dfx_diff_registers *cregs, int reg_len) -{ - struct dfx_diff_registers *diff_regs; - u32 j, base_offset; - int i; - - diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL); - if (!diff_regs) - return ERR_PTR(-ENOMEM); - - for (i = 0; i < reg_len; i++) { - if (!cregs[i].reg_len) - continue; - - diff_regs[i].reg_offset = cregs[i].reg_offset; - diff_regs[i].reg_len = cregs[i].reg_len; - diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len, - GFP_KERNEL); - if (!diff_regs[i].regs) - goto alloc_error; - - for (j = 0; j < diff_regs[i].reg_len; j++) { - base_offset = diff_regs[i].reg_offset + - j * QM_DFX_REGS_LEN; - diff_regs[i].regs[j] = readl(qm->io_base + base_offset); - } - } - - return diff_regs; - -alloc_error: - while (i > 0) { - i--; - kfree(diff_regs[i].regs); - } - kfree(diff_regs); - return ERR_PTR(-ENOMEM); -} - -static void dfx_regs_uninit(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len) -{ - int i; - - /* Setting the pointer is NULL to prevent double free */ - for (i = 0; i < reg_len; i++) { - kfree(dregs[i].regs); - dregs[i].regs = NULL; - } - kfree(dregs); - dregs = NULL; -} - -/** - * hisi_qm_diff_regs_init() - Allocate memory for registers. - * @qm: device qm handle. - * @dregs: diff registers handle. - * @reg_len: diff registers region length. - */ -int hisi_qm_diff_regs_init(struct hisi_qm *qm, - struct dfx_diff_registers *dregs, int reg_len) -{ - if (!qm || !dregs || reg_len <= 0) - return -EINVAL; - - if (qm->fun_type != QM_HW_PF) - return 0; - - qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); - if (IS_ERR(qm->debug.qm_diff_regs)) - return PTR_ERR(qm->debug.qm_diff_regs); - - qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len); - if (IS_ERR(qm->debug.acc_diff_regs)) { - dfx_regs_uninit(qm, qm->debug.qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); - return PTR_ERR(qm->debug.acc_diff_regs); - } - - return 0; -} -EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init); - -/** - * hisi_qm_diff_regs_uninit() - Free memory for registers. - * @qm: device qm handle. - * @reg_len: diff registers region length. - */ -void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len) -{ - if (!qm || reg_len <= 0 || qm->fun_type != QM_HW_PF) - return; - - dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len); - dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs)); -} -EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit); - -/** - * hisi_qm_acc_diff_regs_dump() - Dump registers's value. - * @qm: device qm handle. - * @s: Debugfs file handle. - * @dregs: diff registers handle. - * @regs_len: diff registers region length. - */ -void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s, - struct dfx_diff_registers *dregs, int regs_len) -{ - u32 j, val, base_offset; - int i, ret; - - if (!qm || !s || !dregs || regs_len <= 0) - return; - - ret = hisi_qm_get_dfx_access(qm); - if (ret) - return; - - down_read(&qm->qps_lock); - for (i = 0; i < regs_len; i++) { - if (!dregs[i].reg_len) - continue; - - for (j = 0; j < dregs[i].reg_len; j++) { - base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN; - val = readl(qm->io_base + base_offset); - if (val != dregs[i].regs[j]) - seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n", - base_offset, dregs[i].regs[j], val); - } - } - up_read(&qm->qps_lock); - - hisi_qm_put_dfx_access(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump); - -static int qm_diff_regs_show(struct seq_file *s, void *unused) -{ - struct hisi_qm *qm = s->private; - - hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs, - ARRAY_SIZE(qm_diff_regs)); - - return 0; -} -DEFINE_SHOW_ATTRIBUTE(qm_diff_regs); - -static ssize_t qm_cmd_read(struct file *filp, char __user *buffer, - size_t count, loff_t *pos) -{ - char buf[QM_DBG_READ_LEN]; - int len; - - len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", - "Please echo help to cmd to get help information"); - - return simple_read_from_buffer(buffer, count, pos, buf, len); -} - -static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, - dma_addr_t *dma_addr) -{ - struct device *dev = &qm->pdev->dev; - void *ctx_addr; - - ctx_addr = kzalloc(ctx_size, GFP_KERNEL); - if (!ctx_addr) - return ERR_PTR(-ENOMEM); - - *dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *dma_addr)) { - dev_err(dev, "DMA mapping error!\n"); - kfree(ctx_addr); - return ERR_PTR(-ENOMEM); - } - - return ctx_addr; -} - -static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, - const void *ctx_addr, dma_addr_t *dma_addr) -{ - struct device *dev = &qm->pdev->dev; - - dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE); - kfree(ctx_addr); -} - -static void dump_show(struct hisi_qm *qm, void *info, - unsigned int info_size, char *info_name) -{ - struct device *dev = &qm->pdev->dev; - u8 *info_curr = info; - u32 i; -#define BYTE_PER_DW 4 - - dev_info(dev, "%s DUMP\n", info_name); - for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) { - pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW, - *(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr)); - } -} - -static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) -{ - return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); -} - -static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) -{ - return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); -} - -static int qm_sqc_dump(struct hisi_qm *qm, const char *s) -{ - struct device *dev = &qm->pdev->dev; - struct qm_sqc *sqc, *sqc_curr; - dma_addr_t sqc_dma; - u32 qp_id; - int ret; - - if (!s) - return -EINVAL; - - ret = kstrtou32(s, 0, &qp_id); - if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); - return -EINVAL; - } - - sqc = qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma); - if (IS_ERR(sqc)) - return PTR_ERR(sqc); - - ret = qm_dump_sqc_raw(qm, sqc_dma, qp_id); - if (ret) { - down_read(&qm->qps_lock); - if (qm->sqc) { - sqc_curr = qm->sqc + qp_id; - - dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC"); - } - up_read(&qm->qps_lock); - - goto free_ctx; - } - - dump_show(qm, sqc, sizeof(*sqc), "SQC"); - -free_ctx: - qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma); - return 0; -} - -static int qm_cqc_dump(struct hisi_qm *qm, const char *s) -{ - struct device *dev = &qm->pdev->dev; - struct qm_cqc *cqc, *cqc_curr; - dma_addr_t cqc_dma; - u32 qp_id; - int ret; - - if (!s) - return -EINVAL; - - ret = kstrtou32(s, 0, &qp_id); - if (ret || qp_id >= qm->qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1); - return -EINVAL; - } - - cqc = qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma); - if (IS_ERR(cqc)) - return PTR_ERR(cqc); - - ret = qm_dump_cqc_raw(qm, cqc_dma, qp_id); - if (ret) { - down_read(&qm->qps_lock); - if (qm->cqc) { - cqc_curr = qm->cqc + qp_id; - - dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC"); - } - up_read(&qm->qps_lock); - - goto free_ctx; - } - - dump_show(qm, cqc, sizeof(*cqc), "CQC"); - -free_ctx: - qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma); - return 0; -} - -static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size, - int cmd, char *name) -{ - struct device *dev = &qm->pdev->dev; - dma_addr_t xeqc_dma; - void *xeqc; - int ret; - - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; - } - - xeqc = qm_ctx_alloc(qm, size, &xeqc_dma); - if (IS_ERR(xeqc)) - return PTR_ERR(xeqc); - - ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1); - if (ret) - goto err_free_ctx; - - dump_show(qm, xeqc, size, name); - -err_free_ctx: - qm_ctx_free(qm, size, xeqc, &xeqc_dma); - return ret; -} - -static int q_dump_param_parse(struct hisi_qm *qm, char *s, - u32 *e_id, u32 *q_id, u16 q_depth) -{ - struct device *dev = &qm->pdev->dev; - unsigned int qp_num = qm->qp_num; - char *presult; - int ret; - - presult = strsep(&s, " "); - if (!presult) { - dev_err(dev, "Please input qp number!\n"); - return -EINVAL; - } - - ret = kstrtou32(presult, 0, q_id); - if (ret || *q_id >= qp_num) { - dev_err(dev, "Please input qp num (0-%u)", qp_num - 1); - return -EINVAL; - } - - presult = strsep(&s, " "); - if (!presult) { - dev_err(dev, "Please input sqe number!\n"); - return -EINVAL; - } - - ret = kstrtou32(presult, 0, e_id); - if (ret || *e_id >= q_depth) { - dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1); - return -EINVAL; - } - - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; - } - - return 0; -} - -static int qm_sq_dump(struct hisi_qm *qm, char *s) -{ - u16 sq_depth = qm->qp_array->cq_depth; - void *sqe, *sqe_curr; - struct hisi_qp *qp; - u32 qp_id, sqe_id; - int ret; - - ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth); - if (ret) - return ret; - - sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL); - if (!sqe) - return -ENOMEM; - - qp = &qm->qp_array[qp_id]; - memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth); - sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size); - memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK, - qm->debug.sqe_mask_len); - - dump_show(qm, sqe_curr, qm->sqe_size, "SQE"); - - kfree(sqe); - - return 0; -} - -static int qm_cq_dump(struct hisi_qm *qm, char *s) -{ - struct qm_cqe *cqe_curr; - struct hisi_qp *qp; - u32 qp_id, cqe_id; - int ret; - - ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth); - if (ret) - return ret; - - qp = &qm->qp_array[qp_id]; - cqe_curr = qp->cqe + cqe_id; - dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE"); + (factor->cir_u << QM_SHAPER_FACTOR_CIR_U_SHIFT) | + (factor->cir_s << QM_SHAPER_FACTOR_CIR_S_SHIFT) | + (QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) | + (factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT); + } + break; + } + } - return 0; + writel(lower_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_L); + writel(upper_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_H); } -static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s, - size_t size, char *name) +static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type, + u32 fun_num, u32 base, u32 number) { - struct device *dev = &qm->pdev->dev; - void *xeqe; - u32 xeqe_id; + struct qm_shaper_factor *factor = NULL; + unsigned int val; int ret; - if (!s) - return -EINVAL; + if (type == SHAPER_VFT && test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) + factor = &qm->factor[fun_num]; - ret = kstrtou32(s, 0, &xeqe_id); + ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); if (ret) - return -EINVAL; + return ret; - if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) { - dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1); - return -EINVAL; - } else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) { - dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1); - return -EINVAL; - } + writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR); + writel(type, qm->io_base + QM_VFT_CFG_TYPE); + if (type == SHAPER_VFT) + fun_num |= base << QM_SHAPER_VFT_OFFSET; - down_read(&qm->qps_lock); + writel(fun_num, qm->io_base + QM_VFT_CFG); - if (qm->eqe && !strcmp(name, "EQE")) { - xeqe = qm->eqe + xeqe_id; - } else if (qm->aeqe && !strcmp(name, "AEQE")) { - xeqe = qm->aeqe + xeqe_id; - } else { - ret = -EINVAL; - goto err_unlock; - } + qm_vft_data_cfg(qm, type, base, number, factor); - dump_show(qm, xeqe, size, name); + writel(0x0, qm->io_base + QM_VFT_CFG_RDY); + writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE); -err_unlock: - up_read(&qm->qps_lock); - return ret; + return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val, + val & BIT(0), POLL_PERIOD, + POLL_TIMEOUT); } -static int qm_dbg_help(struct hisi_qm *qm, char *s) +static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num) { - struct device *dev = &qm->pdev->dev; + u32 qos = qm->factor[fun_num].func_qos; + int ret, i; - if (strsep(&s, " ")) { - dev_err(dev, "Please do not input extra characters!\n"); - return -EINVAL; + ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]); + if (ret) { + dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n"); + return ret; + } + writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG); + for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) { + /* The base number of queue reuse for different alg type */ + ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1); + if (ret) + return ret; } - - dev_info(dev, "available commands:\n"); - dev_info(dev, "sqc \n"); - dev_info(dev, "cqc \n"); - dev_info(dev, "eqc\n"); - dev_info(dev, "aeqc\n"); - dev_info(dev, "sq \n"); - dev_info(dev, "cq \n"); - dev_info(dev, "eq \n"); - dev_info(dev, "aeq \n"); return 0; } -static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf) +/* The config should be conducted after qm_dev_mem_reset() */ +static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base, + u32 number) { - struct device *dev = &qm->pdev->dev; - char *presult, *s, *s_tmp; - int ret; - - s = kstrdup(cmd_buf, GFP_KERNEL); - if (!s) - return -ENOMEM; + int ret, i; - s_tmp = s; - presult = strsep(&s, " "); - if (!presult) { - ret = -EINVAL; - goto err_buffer_free; - } - - if (!strcmp(presult, "sqc")) - ret = qm_sqc_dump(qm, s); - else if (!strcmp(presult, "cqc")) - ret = qm_cqc_dump(qm, s); - else if (!strcmp(presult, "eqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc), - QM_MB_CMD_EQC, "EQC"); - else if (!strcmp(presult, "aeqc")) - ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc), - QM_MB_CMD_AEQC, "AEQC"); - else if (!strcmp(presult, "sq")) - ret = qm_sq_dump(qm, s); - else if (!strcmp(presult, "cq")) - ret = qm_cq_dump(qm, s); - else if (!strcmp(presult, "eq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE"); - else if (!strcmp(presult, "aeq")) - ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE"); - else if (!strcmp(presult, "help")) - ret = qm_dbg_help(qm, s); - else - ret = -EINVAL; + for (i = SQC_VFT; i <= CQC_VFT; i++) { + ret = qm_set_vft_common(qm, i, fun_num, base, number); + if (ret) + return ret; + } - if (ret) - dev_info(dev, "Please echo help\n"); + /* init default shaper qos val */ + if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) { + ret = qm_shaper_init_vft(qm, fun_num); + if (ret) + goto back_sqc_cqc; + } -err_buffer_free: - kfree(s_tmp); + return 0; +back_sqc_cqc: + for (i = SQC_VFT; i <= CQC_VFT; i++) + qm_set_vft_common(qm, i, fun_num, 0, 0); return ret; } -static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer, - size_t count, loff_t *pos) +static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number) { - struct hisi_qm *qm = filp->private_data; - char *cmd_buf, *cmd_buf_tmp; + u64 sqc_vft; int ret; - if (*pos) - return 0; - - ret = hisi_qm_get_dfx_access(qm); + ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1); if (ret) return ret; - /* Judge if the instance is being reset. */ - if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) { - ret = 0; - goto put_dfx_access; - } + sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) | + ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32); + *base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2); + *number = (QM_SQC_VFT_NUM_MASK_v2 & + (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1; - if (count > QM_DBG_WRITE_LEN) { - ret = -ENOSPC; - goto put_dfx_access; - } + return 0; +} - cmd_buf = memdup_user_nul(buffer, count); - if (IS_ERR(cmd_buf)) { - ret = PTR_ERR(cmd_buf); - goto put_dfx_access; - } +void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, + dma_addr_t *dma_addr) +{ + struct device *dev = &qm->pdev->dev; + void *ctx_addr; - cmd_buf_tmp = strchr(cmd_buf, '\n'); - if (cmd_buf_tmp) { - *cmd_buf_tmp = '\0'; - count = cmd_buf_tmp - cmd_buf + 1; - } + ctx_addr = kzalloc(ctx_size, GFP_KERNEL); + if (!ctx_addr) + return ERR_PTR(-ENOMEM); - ret = qm_cmd_write_dump(qm, cmd_buf); - if (ret) { - kfree(cmd_buf); - goto put_dfx_access; + *dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *dma_addr)) { + dev_err(dev, "DMA mapping error!\n"); + kfree(ctx_addr); + return ERR_PTR(-ENOMEM); } - kfree(cmd_buf); + return ctx_addr; +} - ret = count; +void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, + const void *ctx_addr, dma_addr_t *dma_addr) +{ + struct device *dev = &qm->pdev->dev; -put_dfx_access: - hisi_qm_put_dfx_access(qm); - return ret; + dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE); + kfree(ctx_addr); } -static const struct file_operations qm_cmd_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_cmd_read, - .write = qm_cmd_write, -}; - -static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir, - enum qm_debug_file index) +static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) { - struct debugfs_file *file = qm->debug.files + index; - - debugfs_create_file(qm_debug_file_name[index], 0600, dir, file, - &qm_debug_fops); + return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1); +} - file->index = index; - mutex_init(&file->lock); - file->debug = &qm->debug; +static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id) +{ + return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1); } static void qm_hw_error_init_v1(struct hisi_qm *qm) @@ -3101,7 +2130,7 @@ static int qm_drain_qp(struct hisi_qp *qp) return ret; } - addr = qm_ctx_alloc(qm, size, &dma_addr); + addr = hisi_qm_ctx_alloc(qm, size, &dma_addr); if (IS_ERR(addr)) { dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n"); return -ENOMEM; @@ -3136,7 +2165,7 @@ static int qm_drain_qp(struct hisi_qp *qp) usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX); } - qm_ctx_free(qm, size, addr, &dma_addr); + hisi_qm_ctx_free(qm, size, addr, &dma_addr); return ret; } @@ -3721,17 +2750,6 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state) writel(state, qm->io_base + QM_VF_STATE); } -static void qm_last_regs_uninit(struct hisi_qm *qm) -{ - struct qm_debug *debug = &qm->debug; - - if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) - return; - - kfree(debug->qm_last_words); - debug->qm_last_words = NULL; -} - static void hisi_qm_unint_work(struct hisi_qm *qm) { destroy_workqueue(qm->wq); @@ -3762,8 +2780,6 @@ static void hisi_qm_memory_uninit(struct hisi_qm *qm) */ void hisi_qm_uninit(struct hisi_qm *qm) { - qm_last_regs_uninit(qm); - qm_cmd_uninit(qm); hisi_qm_unint_work(qm); down_write(&qm->qps_lock); @@ -4132,45 +3148,6 @@ err_unlock: } EXPORT_SYMBOL_GPL(hisi_qm_stop); -static ssize_t qm_status_read(struct file *filp, char __user *buffer, - size_t count, loff_t *pos) -{ - struct hisi_qm *qm = filp->private_data; - char buf[QM_DBG_READ_LEN]; - int val, len; - - val = atomic_read(&qm->status.flags); - len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]); - - return simple_read_from_buffer(buffer, count, pos, buf, len); -} - -static const struct file_operations qm_status_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = qm_status_read, -}; - -static int qm_debugfs_atomic64_set(void *data, u64 val) -{ - if (val) - return -EINVAL; - - atomic64_set((atomic64_t *)data, 0); - - return 0; -} - -static int qm_debugfs_atomic64_get(void *data, u64 *val) -{ - *val = atomic64_read((atomic64_t *)data); - - return 0; -} - -DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get, - qm_debugfs_atomic64_set, "%llu\n"); - static void qm_hw_error_init(struct hisi_qm *qm) { if (!qm->ops->hw_error_init) { @@ -4277,16 +3254,14 @@ static int hisi_qm_sort_devices(int node, struct list_head *head, struct hisi_qm *qm; struct list_head *n; struct device *dev; - int dev_node = 0; + int dev_node; list_for_each_entry(qm, &qm_list->list, list) { dev = &qm->pdev->dev; - if (IS_ENABLED(CONFIG_NUMA)) { - dev_node = dev_to_node(dev); - if (dev_node < 0) - dev_node = 0; - } + dev_node = dev_to_node(dev); + if (dev_node < 0) + dev_node = 0; res = kzalloc(sizeof(*res), GFP_KERNEL); if (!res) @@ -4592,49 +3567,36 @@ err_put_dfx_access: return ret; } -static ssize_t qm_qos_value_init(const char *buf, unsigned long *val) -{ - int buflen = strlen(buf); - int ret, i; - - for (i = 0; i < buflen; i++) { - if (!isdigit(buf[i])) - return -EINVAL; - } - - ret = sscanf(buf, "%lu", val); - if (ret != QM_QOS_VAL_NUM) - return -EINVAL; - - return 0; -} - static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf, unsigned long *val, unsigned int *fun_index) { + struct bus_type *bus_type = qm->pdev->dev.bus; char tbuf_bdf[QM_DBG_READ_LEN] = {0}; - char val_buf[QM_QOS_VAL_MAX_LEN] = {0}; - u32 tmp1, device, function; - int ret, bus; + char val_buf[QM_DBG_READ_LEN] = {0}; + struct pci_dev *pdev; + struct device *dev; + int ret; ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf); if (ret != QM_QOS_PARAM_NUM) return -EINVAL; - ret = qm_qos_value_init(val_buf, val); + ret = kstrtoul(val_buf, 10, val); if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) { pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n"); return -EINVAL; } - ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function); - if (ret != QM_QOS_BDF_PARAM_NUM) { - pci_err(qm->pdev, "input pci bdf value is error!\n"); - return -EINVAL; + dev = bus_find_device_by_name(bus_type, NULL, tbuf_bdf); + if (!dev) { + pci_err(qm->pdev, "input pci bdf number is error!\n"); + return -ENODEV; } - *fun_index = PCI_DEVFN(device, function); + pdev = container_of(dev, struct pci_dev, dev); + + *fun_index = pdev->devfn; return 0; } @@ -4648,9 +3610,6 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf, unsigned long val; int len, ret; - if (qm->fun_type == QM_HW_VF) - return -EINVAL; - if (*pos != 0) return 0; @@ -4709,7 +3668,7 @@ static const struct file_operations qm_algqos_fops = { * * Create function qos debugfs files, VF ping PF to get function qos. */ -static void hisi_qm_set_algqos_init(struct hisi_qm *qm) +void hisi_qm_set_algqos_init(struct hisi_qm *qm) { if (qm->fun_type == QM_HW_PF) debugfs_create_file("alg_qos", 0644, qm->debug.debug_root, @@ -4719,88 +3678,6 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm) qm, &qm_algqos_fops); } -/** - * hisi_qm_debug_init() - Initialize qm related debugfs files. - * @qm: The qm for which we want to add debugfs files. - * - * Create qm related debugfs files. - */ -void hisi_qm_debug_init(struct hisi_qm *qm) -{ - struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs; - struct qm_dfx *dfx = &qm->debug.dfx; - struct dentry *qm_d; - void *data; - int i; - - qm_d = debugfs_create_dir("qm", qm->debug.debug_root); - qm->debug.qm_d = qm_d; - - /* only show this in PF */ - if (qm->fun_type == QM_HW_PF) { - qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM); - for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++) - qm_create_debugfs_file(qm, qm->debug.qm_d, i); - } - - if (qm_regs) - debugfs_create_file("diff_regs", 0444, qm->debug.qm_d, - qm, &qm_diff_regs_fops); - - debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops); - - debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops); - - debugfs_create_file("status", 0444, qm->debug.qm_d, qm, - &qm_status_fops); - for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) { - data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset); - debugfs_create_file(qm_dfx_files[i].name, - 0644, - qm_d, - data, - &qm_atomic64_ops); - } - - if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) - hisi_qm_set_algqos_init(qm); -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_init); - -/** - * hisi_qm_debug_regs_clear() - clear qm debug related registers. - * @qm: The qm for which we want to clear its debug registers. - */ -void hisi_qm_debug_regs_clear(struct hisi_qm *qm) -{ - const struct debugfs_reg32 *regs; - int i; - - /* clear current_qm */ - writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF); - writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF); - - /* clear current_q */ - writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN); - writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN); - - /* - * these registers are reading and clearing, so clear them after - * reading them. - */ - writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE); - - regs = qm_dfx_regs; - for (i = 0; i < CNT_CYC_REGS_NUM; i++) { - readl(qm->io_base + regs->offset); - regs++; - } - - /* clear clear_enable */ - writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE); -} -EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear); - static void hisi_qm_init_vf_qos(struct hisi_qm *qm, int total_func) { int i; @@ -5439,24 +4316,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm) return 0; } -static void qm_show_last_dfx_regs(struct hisi_qm *qm) -{ - struct qm_debug *debug = &qm->debug; - struct pci_dev *pdev = qm->pdev; - u32 val; - int i; - - if (qm->fun_type == QM_HW_VF || !debug->qm_last_words) - return; - - for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) { - val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset); - if (debug->qm_last_words[i] != val) - pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n", - qm_dfx_regs[i].name, debug->qm_last_words[i], val); - } -} - static int qm_controller_reset(struct hisi_qm *qm) { struct pci_dev *pdev = qm->pdev; @@ -5472,7 +4331,7 @@ static int qm_controller_reset(struct hisi_qm *qm) return ret; } - qm_show_last_dfx_regs(qm); + hisi_qm_show_last_dfx_regs(qm); if (qm->err_ini->show_last_dfx_regs) qm->err_ini->show_last_dfx_regs(qm); @@ -5725,6 +4584,7 @@ static void qm_pf_reset_vf_done(struct hisi_qm *qm) cmd = QM_VF_START_FAIL; } + qm_cmd_init(qm); ret = qm_ping_pf(qm, cmd); if (ret) dev_warn(&pdev->dev, "PF responds timeout in reset done!\n"); @@ -5786,7 +4646,6 @@ static void qm_pf_reset_vf_process(struct hisi_qm *qm, goto err_get_status; qm_pf_reset_vf_done(qm); - qm_cmd_init(qm); dev_info(dev, "device reset done.\n"); @@ -6359,26 +5218,6 @@ err_destroy_idr: return ret; } -static void qm_last_regs_init(struct hisi_qm *qm) -{ - int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs); - struct qm_debug *debug = &qm->debug; - int i; - - if (qm->fun_type == QM_HW_VF) - return; - - debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), - GFP_KERNEL); - if (!debug->qm_last_words) - return; - - for (i = 0; i < dfx_regs_num; i++) { - debug->qm_last_words[i] = readl_relaxed(qm->io_base + - qm_dfx_regs[i].offset); - } -} - /** * hisi_qm_init() - Initialize configures about qm. * @qm: The qm needing init. @@ -6427,8 +5266,6 @@ int hisi_qm_init(struct hisi_qm *qm) qm_cmd_init(qm); atomic_set(&qm->status.flags, QM_INIT); - qm_last_regs_init(qm); - return 0; err_free_qm_memory: @@ -6631,8 +5468,14 @@ int hisi_qm_resume(struct device *dev) } ret = hisi_qm_start(qm); - if (ret) - pci_err(pdev, "failed to start qm(%d)\n", ret); + if (ret) { + if (qm_check_dev_error(qm)) { + pci_info(pdev, "failed to start qm due to device error, device will be reset!\n"); + return 0; + } + + pci_err(pdev, "failed to start qm(%d)!\n", ret); + } return ret; } diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h new file mode 100644 index 0000000000000000000000000000000000000000..1406a422d4551714b6e9453616b1e0e68e7b8449 --- /dev/null +++ b/drivers/crypto/hisilicon/qm_common.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022 HiSilicon Limited. */ +#ifndef QM_COMMON_H +#define QM_COMMON_H + +#define QM_DBG_READ_LEN 256 +#define QM_RESETTING 2 + +struct qm_cqe { + __le32 rsvd0; + __le16 cmd_id; + __le16 rsvd1; + __le16 sq_head; + __le16 sq_num; + __le16 rsvd2; + __le16 w7; +}; + +struct qm_eqe { + __le32 dw0; +}; + +struct qm_aeqe { + __le32 dw0; +}; + +struct qm_sqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le16 w8; + __le16 rsvd0; + __le16 pasid; + __le16 w11; + __le16 cq_num; + __le16 w13; + __le32 rsvd1; +}; + +struct qm_cqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le16 w8; + __le16 rsvd0; + __le16 pasid; + __le16 w11; + __le32 dw6; + __le32 rsvd1; +}; + +struct qm_eqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le32 rsvd[2]; + __le32 dw6; +}; + +struct qm_aeqc { + __le16 head; + __le16 tail; + __le32 base_l; + __le32 base_h; + __le32 dw3; + __le32 rsvd[2]; + __le32 dw6; +}; + +static const char * const qm_s[] = { + "init", "start", "close", "stop", +}; + +void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size, + dma_addr_t *dma_addr); +void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size, + const void *ctx_addr, dma_addr_t *dma_addr); +void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm); +void hisi_qm_set_algqos_init(struct hisi_qm *qm); + +#endif diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c index 84ae8ddd1a131b658594ff70d3c2043ce78ac8cb..f5bfc9755a4a3eaec843589277584afcb6be8a4f 100644 --- a/drivers/crypto/hisilicon/sec2/sec_crypto.c +++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c @@ -283,7 +283,6 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req) spin_lock_bh(&qp_ctx->req_lock); ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe); - if (ctx->fake_req_limit <= atomic_read(&qp_ctx->qp->qp_status.used) && !ret) { list_add_tail(&req->backlog_head, &qp_ctx->backlog); @@ -2009,7 +2008,7 @@ static int sec_aead_sha512_ctx_init(struct crypto_aead *tfm) return sec_aead_ctx_init(tfm, "sha512"); } -static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx, +static int sec_skcipher_cryptlen_check(struct sec_ctx *ctx, struct sec_req *sreq) { u32 cryptlen = sreq->c_req.sk_req->cryptlen; @@ -2071,7 +2070,7 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq) } return 0; } else if (c_alg == SEC_CALG_AES || c_alg == SEC_CALG_SM4) { - return sec_skcipher_cryptlen_ckeck(ctx, sreq); + return sec_skcipher_cryptlen_check(ctx, sreq); } dev_err(dev, "skcipher algorithm error!\n"); diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c index 3705412bac5f19a1cf11cbbffa3a8b2efa647257..93572c0d4faa33b8f6065ff94eee6e23038fd29c 100644 --- a/drivers/crypto/hisilicon/sec2/sec_main.c +++ b/drivers/crypto/hisilicon/sec2/sec_main.c @@ -55,7 +55,7 @@ #define SEC_CONTROL_REG 0x301200 #define SEC_DYNAMIC_GATE_REG 0x30121c #define SEC_CORE_AUTO_GATE 0x30212c -#define SEC_DYNAMIC_GATE_EN 0x7bff +#define SEC_DYNAMIC_GATE_EN 0x7fff #define SEC_CORE_AUTO_GATE_EN GENMASK(3, 0) #define SEC_CLK_GATE_ENABLE BIT(3) #define SEC_CLK_GATE_DISABLE (~BIT(3)) @@ -427,7 +427,6 @@ static void sec_set_endian(struct hisi_qm *qm) if (!IS_ENABLED(CONFIG_64BIT)) reg |= BIT(1); - if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN)) reg |= BIT(0); @@ -899,8 +898,7 @@ static int sec_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN; - ret = hisi_qm_diff_regs_init(qm, sec_diff_regs, - ARRAY_SIZE(sec_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs)); if (ret) { dev_warn(dev, "Failed to init SEC diff regs!\n"); goto debugfs_remove; @@ -915,7 +913,7 @@ static int sec_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove: debugfs_remove_recursive(sec_debugfs_root); return ret; @@ -923,7 +921,7 @@ debugfs_remove: static void sec_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); } diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c index c863435e8c75aadc5c57c2d98900b251b6f99fe1..1549bec3aea59334a8202206ad595aea5166764c 100644 --- a/drivers/crypto/hisilicon/zip/zip_main.c +++ b/drivers/crypto/hisilicon/zip/zip_main.c @@ -849,8 +849,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET; qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN; qm->debug.debug_root = dev_d; - ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs, - ARRAY_SIZE(hzip_diff_regs)); + ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs)); if (ret) { dev_warn(dev, "Failed to init ZIP diff regs!\n"); goto debugfs_remove; @@ -869,7 +868,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm) return 0; failed_to_create: - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove: debugfs_remove_recursive(hzip_debugfs_root); return ret; @@ -895,7 +894,7 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm) static void hisi_zip_debugfs_exit(struct hisi_qm *qm) { - hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); + hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs)); debugfs_remove_recursive(qm->debug.debug_root); diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index d8e82d69745d8c95af1101c652a524e173edf540..9629e98bd68b7096af551610e63ca4fa2f139022 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -358,12 +358,16 @@ static int img_hash_dma_init(struct img_hash_dev *hdev) static void img_hash_dma_task(unsigned long d) { struct img_hash_dev *hdev = (struct img_hash_dev *)d; - struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req); + struct img_hash_request_ctx *ctx; u8 *addr; size_t nbytes, bleft, wsend, len, tbc; struct scatterlist tsg; - if (!hdev->req || !ctx->sg) + if (!hdev->req) + return; + + ctx = ahash_request_ctx(hdev->req); + if (!ctx->sg) return; addr = sg_virt(ctx->sg); diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c index ad0d8c4a71ac1b5801672071c60173998e6bdd37..ae6110376e21c690a2e0f065915585ab3a7a66c1 100644 --- a/drivers/crypto/inside-secure/safexcel.c +++ b/drivers/crypto/inside-secure/safexcel.c @@ -316,14 +316,20 @@ static void eip197_init_firmware(struct safexcel_crypto_priv *priv) static int eip197_write_firmware(struct safexcel_crypto_priv *priv, const struct firmware *fw) { - const __be32 *data = (const __be32 *)fw->data; + u32 val; int i; /* Write the firmware */ - for (i = 0; i < fw->size / sizeof(u32); i++) - writel(be32_to_cpu(data[i]), + for (i = 0; i < fw->size / sizeof(u32); i++) { + if (priv->data->fw_little_endian) + val = le32_to_cpu(((const __le32 *)fw->data)[i]); + else + val = be32_to_cpu(((const __be32 *)fw->data)[i]); + + writel(val, priv->base + EIP197_CLASSIFICATION_RAMS + - i * sizeof(__be32)); + i * sizeof(val)); + } /* Exclude final 2 NOPs from size */ return i - EIP197_FW_TERMINAL_NOPS; @@ -410,11 +416,13 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv) int i, j, ret = 0, pe; int ipuesz, ifppsz, minifw = 0; - if (priv->version == EIP197D_MRVL) + if (priv->data->version == EIP197D_MRVL) dir = "eip197d"; - else if (priv->version == EIP197B_MRVL || - priv->version == EIP197_DEVBRD) + else if (priv->data->version == EIP197B_MRVL || + priv->data->version == EIP197_DEVBRD) dir = "eip197b"; + else if (priv->data->version == EIP197C_MXL) + dir = "eip197c"; else return -ENODEV; @@ -423,7 +431,7 @@ retry_fw: snprintf(fw_path, 37, "inside-secure/%s/%s", dir, fw_name[i]); ret = firmware_request_nowarn(&fw[i], fw_path, priv->dev); if (ret) { - if (minifw || priv->version != EIP197B_MRVL) + if (minifw || priv->data->version != EIP197B_MRVL) goto release_fw; /* Fallback to the old firmware location for the @@ -1597,7 +1605,7 @@ static int safexcel_probe_generic(void *pdev, safexcel_configure(priv); - if (IS_ENABLED(CONFIG_PCI) && priv->version == EIP197_DEVBRD) { + if (IS_ENABLED(CONFIG_PCI) && priv->data->version == EIP197_DEVBRD) { /* * Request MSI vectors for global + 1 per ring - * or just 1 for older dev images @@ -1731,7 +1739,7 @@ static int safexcel_probe(struct platform_device *pdev) return -ENOMEM; priv->dev = dev; - priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev); + priv->data = (struct safexcel_priv_data *)of_device_get_match_data(dev); platform_set_drvdata(pdev, priv); @@ -1806,27 +1814,52 @@ static int safexcel_remove(struct platform_device *pdev) return 0; } +static const struct safexcel_priv_data eip97ies_mrvl_data = { + .version = EIP97IES_MRVL, +}; + +static const struct safexcel_priv_data eip197b_mrvl_data = { + .version = EIP197B_MRVL, +}; + +static const struct safexcel_priv_data eip197d_mrvl_data = { + .version = EIP197D_MRVL, +}; + +static const struct safexcel_priv_data eip197_devbrd_data = { + .version = EIP197_DEVBRD, +}; + +static const struct safexcel_priv_data eip197c_mxl_data = { + .version = EIP197C_MXL, + .fw_little_endian = true, +}; + static const struct of_device_id safexcel_of_match_table[] = { { .compatible = "inside-secure,safexcel-eip97ies", - .data = (void *)EIP97IES_MRVL, + .data = &eip97ies_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197b", - .data = (void *)EIP197B_MRVL, + .data = &eip197b_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197d", - .data = (void *)EIP197D_MRVL, + .data = &eip197d_mrvl_data, + }, + { + .compatible = "inside-secure,safexcel-eip197c-mxl", + .data = &eip197c_mxl_data, }, /* For backward compatibility and intended for generic use */ { .compatible = "inside-secure,safexcel-eip97", - .data = (void *)EIP97IES_MRVL, + .data = &eip97ies_mrvl_data, }, { .compatible = "inside-secure,safexcel-eip197", - .data = (void *)EIP197B_MRVL, + .data = &eip197b_mrvl_data, }, {}, }; @@ -1862,7 +1895,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev, return -ENOMEM; priv->dev = dev; - priv->version = (enum safexcel_eip_version)ent->driver_data; + priv->data = (struct safexcel_priv_data *)ent->driver_data; pci_set_drvdata(pdev, priv); @@ -1881,7 +1914,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev, } priv->base = pcim_iomap_table(pdev)[0]; - if (priv->version == EIP197_DEVBRD) { + if (priv->data->version == EIP197_DEVBRD) { dev_dbg(dev, "Device identified as FPGA based development board - applying HW reset\n"); rc = pcim_iomap_regions(pdev, 4, "crypto_safexcel"); @@ -1949,7 +1982,7 @@ static const struct pci_device_id safexcel_pci_ids[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_XILINX, 0x9038, 0x16ae, 0xc522), - .driver_data = EIP197_DEVBRD, + .driver_data = (kernel_ulong_t)&eip197_devbrd_data, }, {}, }; diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h index 797ff91512e0d03f0b9a79b124a85c9d6fd11945..6c2fc662f64ffa727f4d2da1a4c74945d7932a54 100644 --- a/drivers/crypto/inside-secure/safexcel.h +++ b/drivers/crypto/inside-secure/safexcel.h @@ -730,7 +730,13 @@ enum safexcel_eip_version { EIP97IES_MRVL, EIP197B_MRVL, EIP197D_MRVL, - EIP197_DEVBRD + EIP197_DEVBRD, + EIP197C_MXL, +}; + +struct safexcel_priv_data { + enum safexcel_eip_version version; + bool fw_little_endian; }; /* Priority we use for advertising our algorithms */ @@ -815,7 +821,7 @@ struct safexcel_crypto_priv { struct clk *reg_clk; struct safexcel_config config; - enum safexcel_eip_version version; + struct safexcel_priv_data *data; struct safexcel_register_offsets offsets; struct safexcel_hwconfig hwconfig; u32 flags; diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c index 103fc551d2af902262ef9fef45a5c95e87b92a21..ca46328472d4bc0041ba7d03f3cf10caa6f33bba 100644 --- a/drivers/crypto/inside-secure/safexcel_hash.c +++ b/drivers/crypto/inside-secure/safexcel_hash.c @@ -231,7 +231,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, struct safexcel_result_desc *rdesc; struct ahash_request *areq = ahash_request_cast(async); struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq); - struct safexcel_ahash_req *sreq = ahash_request_ctx(areq); + struct safexcel_ahash_req *sreq = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash); u64 cache_len; @@ -312,7 +312,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring, int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); struct safexcel_crypto_priv *priv = ctx->base.priv; struct safexcel_command_desc *cdesc, *first_cdesc = NULL; @@ -569,7 +569,7 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring, bool *should_complete, int *ret) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int err; BUG_ON(!(priv->flags & EIP197_TRC_CACHE) && req->needs_inv); @@ -608,7 +608,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async, int ring, int *commands, int *results) { struct ahash_request *areq = ahash_request_cast(async); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int ret; if (req->needs_inv) @@ -624,7 +624,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm); struct safexcel_crypto_priv *priv = ctx->base.priv; EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE); - struct safexcel_ahash_req *rctx = ahash_request_ctx(req); + struct safexcel_ahash_req *rctx = ahash_request_ctx_dma(req); struct safexcel_inv_result result = {}; int ring = ctx->base.ring; @@ -663,7 +663,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm) */ static int safexcel_ahash_cache(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); u64 cache_len; /* cache_len: everything accepted by the driver but not sent yet, @@ -689,7 +689,7 @@ static int safexcel_ahash_cache(struct ahash_request *areq) static int safexcel_ahash_enqueue(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_crypto_priv *priv = ctx->base.priv; int ret, ring; @@ -741,7 +741,7 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq) static int safexcel_ahash_update(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); int ret; /* If the request is 0 length, do nothing */ @@ -766,7 +766,7 @@ static int safexcel_ahash_update(struct ahash_request *areq) static int safexcel_ahash_final(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); req->finish = true; @@ -870,7 +870,7 @@ static int safexcel_ahash_final(struct ahash_request *areq) static int safexcel_ahash_finup(struct ahash_request *areq) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); req->finish = true; @@ -880,7 +880,7 @@ static int safexcel_ahash_finup(struct ahash_request *areq) static int safexcel_ahash_export(struct ahash_request *areq, void *out) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); struct safexcel_ahash_export_state *export = out; export->len = req->len; @@ -896,7 +896,7 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out) static int safexcel_ahash_import(struct ahash_request *areq, const void *in) { - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); const struct safexcel_ahash_export_state *export = in; int ret; @@ -927,15 +927,15 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm) ctx->base.handle_result = safexcel_handle_result; ctx->fb_do_setkey = false; - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct safexcel_ahash_req)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct safexcel_ahash_req)); return 0; } static int safexcel_sha1_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1012,7 +1012,7 @@ struct safexcel_alg_template safexcel_alg_sha1 = { static int safexcel_hmac_sha1_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1124,7 +1124,7 @@ static int safexcel_hmac_init_iv(struct ahash_request *areq, if (ret) return ret; - req = ahash_request_ctx(areq); + req = ahash_request_ctx_dma(areq); req->hmac = true; req->last_req = true; @@ -1264,7 +1264,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha1 = { static int safexcel_sha256_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1321,7 +1321,7 @@ struct safexcel_alg_template safexcel_alg_sha256 = { static int safexcel_sha224_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1385,7 +1385,7 @@ static int safexcel_hmac_sha224_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha224_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1457,7 +1457,7 @@ static int safexcel_hmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha256_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1522,7 +1522,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha256 = { static int safexcel_sha512_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1579,7 +1579,7 @@ struct safexcel_alg_template safexcel_alg_sha512 = { static int safexcel_sha384_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1643,7 +1643,7 @@ static int safexcel_hmac_sha512_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha512_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1715,7 +1715,7 @@ static int safexcel_hmac_sha384_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sha384_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1780,7 +1780,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha384 = { static int safexcel_md5_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1837,7 +1837,7 @@ struct safexcel_alg_template safexcel_alg_md5 = { static int safexcel_hmac_md5_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1920,7 +1920,7 @@ static int safexcel_crc32_cra_init(struct crypto_tfm *tfm) static int safexcel_crc32_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -1992,7 +1992,7 @@ struct safexcel_alg_template safexcel_alg_crc32 = { static int safexcel_cbcmac_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2252,7 +2252,7 @@ struct safexcel_alg_template safexcel_alg_cmac = { static int safexcel_sm3_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2316,7 +2316,7 @@ static int safexcel_hmac_sm3_setkey(struct crypto_ahash *tfm, const u8 *key, static int safexcel_hmac_sm3_init(struct ahash_request *areq) { struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq)); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2382,7 +2382,7 @@ static int safexcel_sha3_224_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2400,7 +2400,7 @@ static int safexcel_sha3_fbcheck(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); int ret = 0; if (ctx->do_fallback) { @@ -2437,7 +2437,7 @@ static int safexcel_sha3_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_update(subreq); @@ -2447,7 +2447,7 @@ static int safexcel_sha3_final(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_final(subreq); @@ -2457,7 +2457,7 @@ static int safexcel_sha3_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback |= !req->nbytes; if (ctx->do_fallback) @@ -2472,7 +2472,7 @@ static int safexcel_sha3_digest_fallback(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; ctx->fb_init_done = false; @@ -2492,7 +2492,7 @@ static int safexcel_sha3_export(struct ahash_request *req, void *out) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_export(subreq, out); @@ -2502,7 +2502,7 @@ static int safexcel_sha3_import(struct ahash_request *req, const void *in) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct ahash_request *subreq = ahash_request_ctx(req); + struct ahash_request *subreq = ahash_request_ctx_dma(req); ctx->do_fallback = true; return safexcel_sha3_fbcheck(req) ?: crypto_ahash_import(subreq, in); @@ -2526,9 +2526,10 @@ static int safexcel_sha3_cra_init(struct crypto_tfm *tfm) /* Update statesize from fallback algorithm! */ crypto_hash_alg_common(ahash)->statesize = crypto_ahash_statesize(ctx->fback); - crypto_ahash_set_reqsize(ahash, max(sizeof(struct safexcel_ahash_req), - sizeof(struct ahash_request) + - crypto_ahash_reqsize(ctx->fback))); + crypto_ahash_set_reqsize_dma( + ahash, max(sizeof(struct safexcel_ahash_req), + sizeof(struct ahash_request) + + crypto_ahash_reqsize(ctx->fback))); return 0; } @@ -2575,7 +2576,7 @@ static int safexcel_sha3_256_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2633,7 +2634,7 @@ static int safexcel_sha3_384_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2691,7 +2692,7 @@ static int safexcel_sha3_512_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2841,7 +2842,7 @@ static int safexcel_hmac_sha3_224_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2912,7 +2913,7 @@ static int safexcel_hmac_sha3_256_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -2983,7 +2984,7 @@ static int safexcel_hmac_sha3_384_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); @@ -3054,7 +3055,7 @@ static int safexcel_hmac_sha3_512_init(struct ahash_request *areq) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm); - struct safexcel_ahash_req *req = ahash_request_ctx(areq); + struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq); memset(req, 0, sizeof(*req)); diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index d39a386b31ac7752ffb17b89efac49ea76063523..984b3cc0237ca5124816dfc98587f60bd4603989 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -420,7 +420,7 @@ static void one_packet(dma_addr_t phys) break; case CTL_FLAG_GEN_REVAES: ctx = crypto_tfm_ctx(crypt->data.tfm); - *(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); + *(__be32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR); if (atomic_dec_and_test(&ctx->configuring)) complete(&ctx->completion); break; @@ -720,7 +720,7 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target, crypt->init_len = init_len; crypt->ctl_flags |= CTL_FLAG_GEN_ICV; - buf->next = 0; + buf->next = NULL; buf->buf_len = HMAC_PAD_BLOCKLEN; buf->pkt_len = 0; buf->phys_addr = pad_phys; @@ -751,7 +751,7 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize #ifndef __ARMEB__ cfgword ^= 0xAA000000; /* change the "byte swap" flags */ #endif - *(u32 *)cinfo = cpu_to_be32(cfgword); + *(__be32 *)cinfo = cpu_to_be32(cfgword); cinfo += sizeof(cfgword); /* write ICV to cryptinfo */ @@ -788,7 +788,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm) if (!crypt) return -EAGAIN; - *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); + *(__be32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR); crypt->data.tfm = tfm; crypt->crypt_offs = 0; @@ -846,7 +846,7 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key, return err; } /* write cfg word to cryptinfo */ - *(u32 *)cinfo = cpu_to_be32(cipher_cfg); + *(__be32 *)cinfo = cpu_to_be32(cipher_cfg); cinfo += sizeof(cipher_cfg); /* write cipher key to cryptinfo */ diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c index 0379dbf32a4c46f8b6189cd14cab8358841c504d..d4bcbed1f5466da6728ae0132f78d4d1e400598e 100644 --- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c +++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c @@ -226,7 +226,7 @@ static void kmb_ocs_hcu_dma_cleanup(struct ahash_request *req, */ static int kmb_ocs_dma_prepare(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct device *dev = rctx->hcu_dev->dev; unsigned int remainder = 0; unsigned int total; @@ -356,7 +356,7 @@ cleanup: static void kmb_ocs_hcu_secure_cleanup(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Clear buffer of any data. */ memzero_explicit(rctx->buffer, sizeof(rctx->buffer)); @@ -374,7 +374,7 @@ static int kmb_ocs_hcu_handle_queue(struct ahash_request *req) static int prepare_ipad(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); int i; @@ -414,7 +414,7 @@ static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq) base); struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct ocs_hcu_ctx *tctx = crypto_ahash_ctx(tfm); int rc; int i; @@ -561,7 +561,7 @@ error: static int kmb_ocs_hcu_init(struct ahash_request *req) { struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req); - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); @@ -614,7 +614,7 @@ static int kmb_ocs_hcu_init(struct ahash_request *req) static int kmb_ocs_hcu_update(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); int rc; if (!req->nbytes) @@ -650,7 +650,7 @@ static int kmb_ocs_hcu_update(struct ahash_request *req) /* Common logic for kmb_ocs_hcu_final() and kmb_ocs_hcu_finup(). */ static int kmb_ocs_hcu_fin_common(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm); int rc; @@ -687,7 +687,7 @@ static int kmb_ocs_hcu_fin_common(struct ahash_request *req) static int kmb_ocs_hcu_final(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); rctx->sg_data_total = 0; rctx->sg_data_offset = 0; @@ -698,7 +698,7 @@ static int kmb_ocs_hcu_final(struct ahash_request *req) static int kmb_ocs_hcu_finup(struct ahash_request *req) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); rctx->sg_data_total = req->nbytes; rctx->sg_data_offset = 0; @@ -726,7 +726,7 @@ static int kmb_ocs_hcu_digest(struct ahash_request *req) static int kmb_ocs_hcu_export(struct ahash_request *req, void *out) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Intermediate data is always stored and applied per request. */ memcpy(out, rctx, sizeof(*rctx)); @@ -736,7 +736,7 @@ static int kmb_ocs_hcu_export(struct ahash_request *req, void *out) static int kmb_ocs_hcu_import(struct ahash_request *req, const void *in) { - struct ocs_hcu_rctx *rctx = ahash_request_ctx(req); + struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req); /* Intermediate data is always stored and applied per request. */ memcpy(rctx, in, sizeof(*rctx)); @@ -822,8 +822,8 @@ err_free_ahash: /* Set request size and initialize tfm context. */ static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx) { - crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), - sizeof(struct ocs_hcu_rctx)); + crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm), + sizeof(struct ocs_hcu_rctx)); /* Init context to 0. */ memzero_explicit(ctx, sizeof(*ctx)); diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h index 205eacac4a348bc995ed99468af1d8d2b19c2107..f8aedafdfdc5f06be69f617cc4dfc1510bb75aa1 100644 --- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h +++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h @@ -534,7 +534,7 @@ union otx_cptx_vqx_misc_ena_w1s { * Word0 * reserved_20_63:44 [63:20] Reserved. * dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add - * to the CPT instruction doorbell count. Readback value is the the + * to the CPT instruction doorbell count. Readback value is the * current number of pending doorbell requests. If counter overflows * CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to * zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF], diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c index df9c2b8747e6dd0c15c1ceaad085c93a4fe31daf..c4250e5fcf8f7bad435bbc6ffbd7138eebb39750 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c +++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c @@ -345,8 +345,7 @@ static void release_tar_archive(struct tar_arch_info_t *tar_arch) kfree(curr); } - if (tar_arch->fw) - release_firmware(tar_arch->fw); + release_firmware(tar_arch->fw); kfree(tar_arch); } diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c index 01c48ddc4eeb42d625abaac0604bb3ed8469a97c..80ba77c793a7cb22e1c1da8769d32e11b491b527 100644 --- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c @@ -103,7 +103,7 @@ static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req) req = container_of(cpt_req->areq, struct aead_request, base); tfm = crypto_aead_reqtfm(req); - rctx = aead_request_ctx(req); + rctx = aead_request_ctx_dma(req); if (memcmp(rctx->fctx.hmac.s.hmac_calc, rctx->fctx.hmac.s.hmac_recv, crypto_aead_authsize(tfm)) != 0) @@ -155,7 +155,7 @@ static void output_iv_copyback(struct crypto_async_request *areq) ctx = crypto_skcipher_ctx(stfm); if (ctx->cipher_type == OTX_CPT_AES_CBC || ctx->cipher_type == OTX_CPT_DES3_CBC) { - rctx = skcipher_request_ctx(sreq); + rctx = skcipher_request_ctx_dma(sreq); req_info = &rctx->cpt_req; ivsize = crypto_skcipher_ivsize(stfm); start = sreq->cryptlen - ivsize; @@ -233,7 +233,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, u32 *argcnt) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm); struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm); @@ -303,7 +303,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline u32 create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; int ret; @@ -321,7 +321,7 @@ static inline u32 create_input_list(struct skcipher_request *req, u32 enc, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -340,7 +340,7 @@ static inline void create_output_list(struct skcipher_request *req, static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 enc_iv_len = crypto_skcipher_ivsize(stfm); struct pci_dev *pdev; @@ -501,15 +501,16 @@ static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm) * allocated since the cryptd daemon uses * this memory for request_ctx information */ - crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_set_reqsize_dma( + tfm, sizeof(struct otx_cpt_req_ctx) + + sizeof(struct skcipher_request)); return 0; } static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->cipher_type = cipher_type; ctx->mac_type = mac_type; @@ -551,7 +552,7 @@ static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type) } } - crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct otx_cpt_req_ctx)); return 0; } @@ -603,7 +604,7 @@ static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) static void otx_cpt_aead_exit(struct crypto_aead *tfm) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); kfree(ctx->ipad); kfree(ctx->opad); @@ -619,7 +620,7 @@ static void otx_cpt_aead_exit(struct crypto_aead *tfm) static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); switch (ctx->mac_type) { case OTX_CPT_SHA1: @@ -739,7 +740,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) static int aead_hmac_init(struct crypto_aead *cipher) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); int state_size = crypto_shash_statesize(ctx->hashalg); int ds = crypto_shash_digestsize(ctx->hashalg); int bs = crypto_shash_blocksize(ctx->hashalg); @@ -837,7 +838,7 @@ static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; int enckeylen = 0, authkeylen = 0; struct rtattr *rta = (void *)key; @@ -896,7 +897,7 @@ static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; struct rtattr *rta = (void *)key; int enckeylen = 0; @@ -932,7 +933,7 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); /* * For aes gcm we expect to get encryption key (16, 24, 32 bytes) @@ -965,9 +966,9 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, u32 *argcnt) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct otx_cpt_fc_ctx *fctx = &rctx->fctx; int mac_len = crypto_aead_authsize(tfm); @@ -1050,9 +1051,9 @@ static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc, static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, u32 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx_cpt_req_info *req_info = &rctx->cpt_req; req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER; @@ -1076,7 +1077,7 @@ static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen = req->cryptlen + req->assoclen; u32 status, argcnt = 0; @@ -1093,7 +1094,7 @@ static inline u32 create_aead_input_list(struct aead_request *req, u32 enc) static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0, outputlen = 0; @@ -1111,7 +1112,7 @@ static inline u32 create_aead_output_list(struct aead_request *req, u32 enc, static inline u32 create_aead_null_input_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen, argcnt = 0; @@ -1130,7 +1131,7 @@ static inline u32 create_aead_null_input_list(struct aead_request *req, static inline u32 create_aead_null_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct scatterlist *dst; u8 *ptr = NULL; @@ -1217,7 +1218,7 @@ error: static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) { - struct otx_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct pci_dev *pdev; @@ -1409,7 +1410,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1428,7 +1429,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1447,7 +1448,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1466,7 +1467,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1485,7 +1486,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1504,7 +1505,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1523,7 +1524,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1542,7 +1543,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1561,7 +1562,7 @@ static struct aead_alg otx_cpt_aeads[] = { { .cra_driver_name = "cpt_rfc4106_gcm_aes", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, - .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c index 67530e90bbfed3f8aa8fa346b6a9f81e7661fe83..30b423605c9c1214f6d7baae6de8eb723194c06f 100644 --- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c +++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c @@ -87,7 +87,7 @@ static inline int validate_hmac_cipher_null(struct otx2_cpt_req_info *cpt_req) req = container_of(cpt_req->areq, struct aead_request, base); tfm = crypto_aead_reqtfm(req); - rctx = aead_request_ctx(req); + rctx = aead_request_ctx_dma(req); if (memcmp(rctx->fctx.hmac.s.hmac_calc, rctx->fctx.hmac.s.hmac_recv, crypto_aead_authsize(tfm)) != 0) @@ -137,7 +137,7 @@ static void output_iv_copyback(struct crypto_async_request *areq) ctx = crypto_skcipher_ctx(stfm); if (ctx->cipher_type == OTX2_CPT_AES_CBC || ctx->cipher_type == OTX2_CPT_DES3_CBC) { - rctx = skcipher_request_ctx(sreq); + rctx = skcipher_request_ctx_dma(sreq); req_info = &rctx->cpt_req; ivsize = crypto_skcipher_ivsize(stfm); start = sreq->cryptlen - ivsize; @@ -219,7 +219,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, u32 *argcnt) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; @@ -288,7 +288,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc, static inline int create_input_list(struct skcipher_request *req, u32 enc, u32 enc_iv_len) { - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; int ret; @@ -306,7 +306,7 @@ static inline int create_input_list(struct skcipher_request *req, u32 enc, static inline void create_output_list(struct skcipher_request *req, u32 enc_iv_len) { - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0; @@ -325,7 +325,7 @@ static inline void create_output_list(struct skcipher_request *req, static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); int ret; @@ -348,7 +348,7 @@ static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc) static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc) { struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req); - struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req); struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 enc_iv_len = crypto_skcipher_ivsize(stfm); @@ -537,8 +537,9 @@ static int otx2_cpt_enc_dec_init(struct crypto_skcipher *stfm) * allocated since the cryptd daemon uses * this memory for request_ctx information */ - crypto_skcipher_set_reqsize(stfm, sizeof(struct otx2_cpt_req_ctx) + - sizeof(struct skcipher_request)); + crypto_skcipher_set_reqsize_dma( + stfm, sizeof(struct otx2_cpt_req_ctx) + + sizeof(struct skcipher_request)); return cpt_skcipher_fallback_init(ctx, alg); } @@ -572,7 +573,7 @@ static int cpt_aead_fallback_init(struct otx2_cpt_aead_ctx *ctx, static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(atfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(atfm); struct crypto_tfm *tfm = crypto_aead_tfm(atfm); struct crypto_alg *alg = tfm->__crt_alg; @@ -629,7 +630,7 @@ static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type) ctx->enc_align_len = 1; break; } - crypto_aead_set_reqsize(atfm, sizeof(struct otx2_cpt_req_ctx)); + crypto_aead_set_reqsize_dma(atfm, sizeof(struct otx2_cpt_req_ctx)); return cpt_aead_fallback_init(ctx, alg); } @@ -681,7 +682,7 @@ static int otx2_cpt_aead_gcm_aes_init(struct crypto_aead *tfm) static void otx2_cpt_aead_exit(struct crypto_aead *tfm) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); kfree(ctx->ipad); kfree(ctx->opad); @@ -698,7 +699,7 @@ static void otx2_cpt_aead_exit(struct crypto_aead *tfm) static int otx2_cpt_aead_gcm_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); if (crypto_rfc4106_check_authsize(authsize)) return -EINVAL; @@ -722,7 +723,7 @@ static int otx2_cpt_aead_set_authsize(struct crypto_aead *tfm, static int otx2_cpt_aead_null_set_authsize(struct crypto_aead *tfm, unsigned int authsize) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); ctx->is_trunc_hmac = true; tfm->authsize = authsize; @@ -794,7 +795,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad) static int aead_hmac_init(struct crypto_aead *cipher) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); int state_size = crypto_shash_statesize(ctx->hashalg); int ds = crypto_shash_digestsize(ctx->hashalg); int bs = crypto_shash_blocksize(ctx->hashalg); @@ -892,7 +893,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; int enckeylen = 0, authkeylen = 0; struct rtattr *rta = (void *)key; @@ -944,7 +945,7 @@ static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); struct crypto_authenc_key_param *param; struct rtattr *rta = (void *)key; int enckeylen = 0; @@ -979,7 +980,7 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, const unsigned char *key, unsigned int keylen) { - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher); /* * For aes gcm we expect to get encryption key (16, 24, 32 bytes) @@ -1012,9 +1013,9 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher, static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, u32 *argcnt) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct otx2_cpt_fc_ctx *fctx = &rctx->fctx; int mac_len = crypto_aead_authsize(tfm); @@ -1103,9 +1104,9 @@ static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc, static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, u32 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG; @@ -1127,7 +1128,7 @@ static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt, static inline int create_aead_input_list(struct aead_request *req, u32 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen = req->cryptlen + req->assoclen; u32 status, argcnt = 0; @@ -1144,7 +1145,7 @@ static inline int create_aead_input_list(struct aead_request *req, u32 enc) static inline void create_aead_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 argcnt = 0, outputlen = 0; @@ -1160,7 +1161,7 @@ static inline void create_aead_output_list(struct aead_request *req, u32 enc, static inline void create_aead_null_input_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; u32 inputlen, argcnt = 0; @@ -1177,7 +1178,7 @@ static inline void create_aead_null_input_list(struct aead_request *req, static inline int create_aead_null_output_list(struct aead_request *req, u32 enc, u32 mac_len) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct scatterlist *dst; u8 *ptr = NULL; @@ -1257,9 +1258,9 @@ error_free: static int aead_do_fallback(struct aead_request *req, bool is_enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *aead = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(aead); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(aead); int ret; if (ctx->fbk_cipher) { @@ -1281,10 +1282,10 @@ static int aead_do_fallback(struct aead_request *req, bool is_enc) static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc) { - struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req); + struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req); struct otx2_cpt_req_info *req_info = &rctx->cpt_req; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm); + struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm); struct pci_dev *pdev; int status, cpu_num; @@ -1458,7 +1459,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1477,7 +1478,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1496,7 +1497,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1515,7 +1516,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_cbc_aes", .cra_blocksize = AES_BLOCK_SIZE, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1534,7 +1535,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha1_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1553,7 +1554,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha256_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1572,7 +1573,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha384_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1591,7 +1592,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_hmac_sha512_ecb_null", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, @@ -1610,7 +1611,7 @@ static struct aead_alg otx2_cpt_aeads[] = { { .cra_driver_name = "cpt_rfc4106_gcm_aes", .cra_blocksize = 1, .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, - .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx), + .cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING, .cra_priority = 4001, .cra_alignmask = 0, .cra_module = THIS_MODULE, diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 31e24df18877f52a07b6d55c36a2b1d78da1cd2d..20d0dcd50344b3335448adb83c0ef32c36e5596c 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1229,6 +1229,7 @@ struct n2_hash_tmpl { const u8 *hash_init; u8 hw_op_hashsz; u8 digest_size; + u8 statesize; u8 block_size; u8 auth_type; u8 hmac_type; @@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_MD5, .hw_op_hashsz = MD5_DIGEST_SIZE, .digest_size = MD5_DIGEST_SIZE, + .statesize = sizeof(struct md5_state), .block_size = MD5_HMAC_BLOCK_SIZE }, { .name = "sha1", .hash_zero = sha1_zero_message_hash, @@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA1, .hw_op_hashsz = SHA1_DIGEST_SIZE, .digest_size = SHA1_DIGEST_SIZE, + .statesize = sizeof(struct sha1_state), .block_size = SHA1_BLOCK_SIZE }, { .name = "sha256", .hash_zero = sha256_zero_message_hash, @@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_HMAC_SHA256, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA256_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA256_BLOCK_SIZE }, { .name = "sha224", .hash_zero = sha224_zero_message_hash, @@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = { .hmac_type = AUTH_TYPE_RESERVED, .hw_op_hashsz = SHA256_DIGEST_SIZE, .digest_size = SHA224_DIGEST_SIZE, + .statesize = sizeof(struct sha256_state), .block_size = SHA224_BLOCK_SIZE }, }; #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls) @@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl) halg = &ahash->halg; halg->digestsize = tmpl->digest_size; + halg->statesize = tmpl->statesize; base = &halg->base; snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name); diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h index b66f19ac600f2a1df6c49e12624c564ce8ba9e57..7590bfb24d79bf427d9ed6a5b54874179dfa9e56 100644 --- a/drivers/crypto/nx/nx-842.h +++ b/drivers/crypto/nx/nx-842.h @@ -3,10 +3,10 @@ #ifndef __NX_842_H__ #define __NX_842_H__ +#include #include #include #include -#include #include #include #include diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 655a7f5a406a17d80ebb3103acdd0650a111fdec..cbeda59c6b1911e7cd17e7eb180770b32936fc1f 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2114,7 +2114,7 @@ static int omap_sham_probe(struct platform_device *pdev) pm_runtime_enable(dev); - err = pm_runtime_get_sync(dev); + err = pm_runtime_resume_and_get(dev); if (err < 0) { dev_err(dev, "failed to get sync: %d\n", err); goto err_pm; diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c index fda5f699ff57514362f2ced23f2afdde3a8d7474..834a705180c0254efebd28726713f835d147d93b 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -357,10 +358,11 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data) hw_data->ring_pair_reset = adf_gen4_ring_pair_reset; hw_data->enable_pm = adf_gen4_enable_pm; hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt; - hw_data->dev_config = adf_crypto_dev_config; + hw_data->dev_config = adf_gen4_dev_config; adf_gen4_init_hw_csr_ops(&hw_data->csr_ops); adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops); + adf_gen4_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h index 9d49248931f6a93a32542e027d6e5bbffc1f1586..e98428ba78e29b0956f0a891b8e1983c037111e2 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h +++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h @@ -70,6 +70,6 @@ enum icp_qat_4xxx_slice_mask { void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data); void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data); -int adf_crypto_dev_config(struct adf_accel_dev *accel_dev); +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev); #endif diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c index 2f212561acc47b8c95e52c076ae7cc92980e06c0..b3a4c7b238644f6e451f114ce0eea41c964cb731 100644 --- a/drivers/crypto/qat/qat_4xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c @@ -9,6 +9,7 @@ #include #include "adf_4xxx_hw_data.h" +#include "qat_compression.h" #include "qat_crypto.h" #include "adf_transport_access_macros.h" @@ -19,6 +20,16 @@ static const struct pci_device_id adf_pci_tbl[] = { }; MODULE_DEVICE_TABLE(pci, adf_pci_tbl); +enum configs { + DEV_CFG_CY = 0, + DEV_CFG_DC, +}; + +static const char * const services_operations[] = { + ADF_CFG_CY, + ADF_CFG_DC, +}; + static void adf_cleanup_accel(struct adf_accel_dev *accel_dev) { if (accel_dev->hw_device) { @@ -53,7 +64,7 @@ static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev) return 0; } -int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) +static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) { char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; int banks = GET_MAX_BANKS(accel_dev); @@ -68,14 +79,6 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) else instances = 0; - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - for (i = 0; i < instances; i++) { val = i; bank = i * 2; @@ -155,10 +158,128 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev) if (ret) goto err; - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + return 0; err: - dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); + return ret; +} + +static int adf_comp_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_compression(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 1; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + return 0; +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); + return ret; +} + +int adf_gen4_dev_config(struct adf_accel_dev *accel_dev) +{ + char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0}; + int ret; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC, + ADF_SERVICES_ENABLED, services); + if (ret) + goto err; + + ret = sysfs_match_string(services_operations, services); + if (ret < 0) + goto err; + + switch (ret) { + case DEV_CFG_CY: + ret = adf_crypto_dev_config(accel_dev); + break; + case DEV_CFG_DC: + ret = adf_comp_dev_config(accel_dev); + break; + } + + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); return ret; } @@ -261,6 +382,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev); if (!hw_data->accel_capabilities_mask) { dev_err(&pdev->dev, "Failed to get capabilities mask.\n"); + ret = -EINVAL; goto out_err; } @@ -293,7 +415,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_disable_aer; - ret = adf_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index 50d5afa26a9bee75155345bacb39b9899015bcb2..c55c51a07677d907f6f47710eccb2b8ef24e338b 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include "adf_c3xxx_hw_data.h" @@ -124,9 +126,11 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c3xxx(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c index 2aef0bb791dfa3f39867662c55210bd47c96ec6e..1f4fbf4562b221b7a846293199fb08608fb2911f 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c index a9fbe57b32ae35631a205604d3796604388a9ace..84d9486e04de6bc9438d7384af1e67d07af34232 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include @@ -86,9 +88,11 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c index fa18d8009f533e3bd561baee7c60bc65490a4dbc..cf4ef83e186f835a7edf85ce461869eba43f28b9 100644 --- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index c00386fe6587a684f56dea91da0c1260e3e7a134..b7aa19d2fa80426e01bac03684cd6ec560b61ea3 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include "adf_c62x_hw_data.h" @@ -126,9 +128,11 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->reset_device = adf_reset_flr; hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c62x(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c index 56163083f16163c878f02cf1859982492426a325..4ccaf298250c3c6286195381ba8d144c140d66e8 100644 --- a/drivers/crypto/qat/qat_c62x/adf_drv.c +++ b/drivers/crypto/qat/qat_c62x/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c index 0282038fca54893c93387007a0d24f83a63de912..751d7aa57fc7f04cd95c7ac42792f0286b76c4c5 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include @@ -86,9 +88,11 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c index 686ec752d0e9e8c316904837ff4c265668065f2b..0e642c94b92936bce3a0e92656c3b9865a3617e9 100644 --- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c +++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 80919cfcc29dafceac7cfd65ca7247ca5327e2a6..1fb8d50f509f82c24361a1d93e6148b8244e2c94 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -12,14 +12,20 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ adf_sysfs.o \ adf_gen2_hw_data.o \ + adf_gen2_config.o \ adf_gen4_hw_data.o \ adf_gen4_pm.o \ + adf_gen2_dc.o \ + adf_gen4_dc.o \ qat_crypto.o \ + qat_compression.o \ + qat_comp_algs.o \ qat_algs.o \ qat_asym_algs.o \ qat_algs_send.o \ qat_uclo.o \ - qat_hal.o + qat_hal.o \ + qat_bl.o intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \ diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 0a55a4f34dcfd803bbc2953dc14524c2509792ae..284f5aad3ee0bc687e6f4549517a7ace6d59c5aa 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -163,6 +163,10 @@ struct adf_pfvf_ops { u32 pfvf_offset, u8 compat_ver); }; +struct adf_dc_ops { + void (*build_deflate_ctx)(void *ctx); +}; + struct adf_hw_device_data { struct adf_hw_device_class *dev_class; u32 (*get_accel_mask)(struct adf_hw_device_data *self); @@ -202,6 +206,7 @@ struct adf_hw_device_data { int (*dev_config)(struct adf_accel_dev *accel_dev); struct adf_pfvf_ops pfvf_ops; struct adf_hw_csr_ops csr_ops; + struct adf_dc_ops dc_ops; const char *fw_name; const char *fw_mmp_name; u32 fuses; @@ -247,6 +252,7 @@ struct adf_hw_device_data { #define GET_MAX_ACCELENGINES(accel_dev) (GET_HW_DATA(accel_dev)->num_engines) #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops) #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops) +#define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops) #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev struct adf_admin_comms; @@ -266,13 +272,21 @@ struct adf_accel_vf_info { u8 vf_compat_ver; }; +struct adf_dc_data { + u8 *ovf_buff; + size_t ovf_buff_sz; + dma_addr_t ovf_buff_p; +}; + struct adf_accel_dev { struct adf_etr_data *transport; struct adf_hw_device_data *hw_device; struct adf_cfg_device_data *cfg; struct adf_fw_loader_data *fw_loader; struct adf_admin_comms *admin; + struct adf_dc_data *dc_data; struct list_head crypto_list; + struct list_head compression_list; unsigned long status; atomic_t ref_count; struct dentry *debugfs_dir; diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h index 655248dbf962d75cf73fdd6d75f86f2ac9bdf276..5d8c3bdb258c18042f551ecfe00330b18c561710 100644 --- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h +++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h @@ -20,6 +20,7 @@ #define ADF_ETRMGR_BANK "Bank" #define ADF_RING_SYM_BANK_NUM "BankSymNumber" #define ADF_RING_ASYM_BANK_NUM "BankAsymNumber" +#define ADF_RING_DC_BANK_NUM "BankDcNumber" #define ADF_CY "Cy" #define ADF_DC "Dc" #define ADF_CFG_DC "dc" diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 7bb477c3ce25ff9c9ca1c151d67f50ea2874509f..7189265573c089eaaa4d9b42d4e99e094e4ecca0 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -110,7 +110,6 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev); void adf_cleanup_etr_data(struct adf_accel_dev *accel_dev); int qat_crypto_register(void); int qat_crypto_unregister(void); -int qat_crypto_dev_config(struct adf_accel_dev *accel_dev); int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev); struct qat_crypto_instance *qat_crypto_get_instance_node(int node); void qat_crypto_put_instance(struct qat_crypto_instance *inst); @@ -121,6 +120,14 @@ void qat_algs_unregister(void); int qat_asym_algs_register(void); void qat_asym_algs_unregister(void); +struct qat_compression_instance *qat_compression_get_instance_node(int node); +void qat_compression_put_instance(struct qat_compression_instance *inst); +int qat_compression_register(void); +int qat_compression_unregister(void); +int qat_comp_algs_register(void); +void qat_comp_algs_unregister(void); +void qat_comp_alg_callback(void *resp); + int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev); void adf_isr_resource_free(struct adf_accel_dev *accel_dev); int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev); diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c index 82b69e1f725ba278d76fdd414cb7b67e7d450073..9190532b27eb5bf917e5057b0f20a8a2641b5bf0 100644 --- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c +++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c @@ -438,8 +438,13 @@ static int __init adf_register_ctl_device_driver(void) if (qat_crypto_register()) goto err_crypto_register; + if (qat_compression_register()) + goto err_compression_register; + return 0; +err_compression_register: + qat_crypto_unregister(); err_crypto_register: adf_exit_vf_wq(); err_vf_wq: @@ -463,6 +468,7 @@ static void __exit adf_unregister_ctl_device_driver(void) adf_exit_vf_wq(); adf_exit_pf_wq(); qat_crypto_unregister(); + qat_compression_unregister(); adf_clean_vf_map(false); mutex_destroy(&adf_ctl_lock); } diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.c b/drivers/crypto/qat/qat_common/adf_gen2_config.c new file mode 100644 index 0000000000000000000000000000000000000000..eeb30da7587a504ed85c075efe506983affbc4bd --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_config.c @@ -0,0 +1,206 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_cfg.h" +#include "adf_cfg_strings.h" +#include "adf_gen2_config.h" +#include "adf_common_drv.h" +#include "qat_crypto.h" +#include "qat_compression.h" +#include "adf_transport_access_macros.h" + +static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_crypto(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, + i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); + val = 128; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 0; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 2; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 8; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 10; + snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = ADF_COALESCING_DEF_TIME; + snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); + ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, + &val, ADF_DEC); + if (ret) + goto err; + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n"); + return ret; +} + +static int adf_gen2_comp_dev_config(struct adf_accel_dev *accel_dev) +{ + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + int banks = GET_MAX_BANKS(accel_dev); + int cpus = num_online_cpus(); + unsigned long val; + int instances; + int ret; + int i; + + if (adf_hw_dev_has_compression(accel_dev)) + instances = min(cpus, banks); + else + instances = 0; + + for (i = 0; i < instances; i++) { + val = i; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 512; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 6; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + + val = 14; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, + key, &val, ADF_DEC); + if (ret) + goto err; + } + + val = i; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + return ret; + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n"); + return ret; +} + +/** + * adf_gen2_dev_config() - create dev config required to create instances + * + * @accel_dev: Pointer to acceleration device. + * + * Function creates device configuration required to create instances + * + * Return: 0 on success, error code otherwise. + */ +int adf_gen2_dev_config(struct adf_accel_dev *accel_dev) +{ + int ret; + + ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); + if (ret) + goto err; + + ret = adf_cfg_section_add(accel_dev, "Accelerator0"); + if (ret) + goto err; + + ret = adf_gen2_crypto_dev_config(accel_dev); + if (ret) + goto err; + + ret = adf_gen2_comp_dev_config(accel_dev); + if (ret) + goto err; + + set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); + + return ret; + +err: + dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n"); + return ret; +} +EXPORT_SYMBOL_GPL(adf_gen2_dev_config); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.h b/drivers/crypto/qat/qat_common/adf_gen2_config.h new file mode 100644 index 0000000000000000000000000000000000000000..4bf9da2de68aad1f964a70e8a027e72731da40df --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_config.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN2_CONFIG_H_ +#define ADF_GEN2_CONFIG_H_ + +#include "adf_accel_devices.h" + +int adf_gen2_dev_config(struct adf_accel_dev *accel_dev); + +#endif diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.c b/drivers/crypto/qat/qat_common/adf_gen2_dc.c new file mode 100644 index 0000000000000000000000000000000000000000..47261b1c1da69fbbbd73946780a059d0c5a5095a --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "adf_gen2_dc.h" +#include "icp_qat_fw_comp.h" + +static void qat_comp_build_deflate_ctx(void *ctx) +{ + struct icp_qat_fw_comp_req *req_tmpl = (struct icp_qat_fw_comp_req *)ctx; + struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars; + struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars; + struct icp_qat_fw_comp_cd_hdr *comp_cd_ctrl = &req_tmpl->comp_cd_ctrl; + + memset(req_tmpl, 0, sizeof(*req_tmpl)); + header->hdr_flags = + ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); + header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC; + header->comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA, + QAT_COMN_PTR_TYPE_SGL); + header->serv_specif_flags = + ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION, + ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF); + cd_pars->u.sl.comp_slice_cfg_word[0] = + ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_COMPRESS, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED, + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE, + ICP_QAT_HW_COMPRESSION_DEPTH_1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0); + req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER; + req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC; + req_pars->req_par_flags = + ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP, + ICP_QAT_FW_COMP_EOP, + ICP_QAT_FW_COMP_BFINAL, + ICP_QAT_FW_COMP_CNV, + ICP_QAT_FW_COMP_CNV_RECOVERY, + ICP_QAT_FW_COMP_NO_CNV_DFX, + ICP_QAT_FW_COMP_CRC_MODE_LEGACY, + ICP_QAT_FW_COMP_NO_XXHASH_ACC, + ICP_QAT_FW_COMP_CNV_ERROR_NONE, + ICP_QAT_FW_COMP_NO_APPEND_CRC, + ICP_QAT_FW_COMP_NO_DROP_DATA); + ICP_QAT_FW_COMN_NEXT_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR); + ICP_QAT_FW_COMN_CURR_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_COMP); + + /* Fill second half of the template for decompression */ + memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl)); + req_tmpl++; + header = &req_tmpl->comn_hdr; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; + cd_pars = &req_tmpl->cd_pars; + cd_pars->u.sl.comp_slice_cfg_word[0] = + ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED, + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE, + ICP_QAT_HW_COMPRESSION_DEPTH_1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0); +} + +void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops) +{ + dc_ops->build_deflate_ctx = qat_comp_build_deflate_ctx; +} +EXPORT_SYMBOL_GPL(adf_gen2_init_dc_ops); diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.h b/drivers/crypto/qat/qat_common/adf_gen2_dc.h new file mode 100644 index 0000000000000000000000000000000000000000..6eae023354d768d30b913572e9c83eaa13983583 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN2_DC_H +#define ADF_GEN2_DC_H + +#include "adf_accel_devices.h" + +void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops); + +#endif /* ADF_GEN2_DC_H */ diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.c b/drivers/crypto/qat/qat_common/adf_gen4_dc.c new file mode 100644 index 0000000000000000000000000000000000000000..5859238e37de5e8242f3c1475fed75d34aaff1e1 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include "adf_accel_devices.h" +#include "icp_qat_fw_comp.h" +#include "icp_qat_hw_20_comp.h" +#include "adf_gen4_dc.h" + +static void qat_comp_build_deflate(void *ctx) +{ + struct icp_qat_fw_comp_req *req_tmpl = + (struct icp_qat_fw_comp_req *)ctx; + struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars; + struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars; + struct icp_qat_hw_comp_20_config_csr_upper hw_comp_upper_csr = {0}; + struct icp_qat_hw_comp_20_config_csr_lower hw_comp_lower_csr = {0}; + struct icp_qat_hw_decomp_20_config_csr_lower hw_decomp_lower_csr = {0}; + u32 upper_val; + u32 lower_val; + + memset(req_tmpl, 0, sizeof(*req_tmpl)); + header->hdr_flags = + ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET); + header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC; + header->comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA, + QAT_COMN_PTR_TYPE_SGL); + header->serv_specif_flags = + ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION, + ICP_QAT_FW_COMP_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST, + ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF); + hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL; + hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77; + hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED; + hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1; + hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW; + hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED; + hw_comp_upper_csr.nice = ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL; + hw_comp_upper_csr.lazy = ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL; + + upper_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(hw_comp_upper_csr); + lower_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(hw_comp_lower_csr); + + cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val; + cd_pars->u.sl.comp_slice_cfg_word[1] = upper_val; + + req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER; + req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC; + req_pars->req_par_flags = + ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP, + ICP_QAT_FW_COMP_EOP, + ICP_QAT_FW_COMP_BFINAL, + ICP_QAT_FW_COMP_CNV, + ICP_QAT_FW_COMP_CNV_RECOVERY, + ICP_QAT_FW_COMP_NO_CNV_DFX, + ICP_QAT_FW_COMP_CRC_MODE_LEGACY, + ICP_QAT_FW_COMP_NO_XXHASH_ACC, + ICP_QAT_FW_COMP_CNV_ERROR_NONE, + ICP_QAT_FW_COMP_NO_APPEND_CRC, + ICP_QAT_FW_COMP_NO_DROP_DATA); + + /* Fill second half of the template for decompression */ + memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl)); + req_tmpl++; + header = &req_tmpl->comn_hdr; + header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS; + cd_pars = &req_tmpl->cd_pars; + + hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE; + lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr); + + cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val; + cd_pars->u.sl.comp_slice_cfg_word[1] = 0; +} + +void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops) +{ + dc_ops->build_deflate_ctx = qat_comp_build_deflate; +} +EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops); diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.h b/drivers/crypto/qat/qat_common/adf_gen4_dc.h new file mode 100644 index 0000000000000000000000000000000000000000..0b1a6774412eb12cb6636d125495de0b7862baf3 --- /dev/null +++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef ADF_GEN4_DC_H +#define ADF_GEN4_DC_H + +#include "adf_accel_devices.h" + +void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops); + +#endif /* ADF_GEN4_DC_H */ diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c index 33a9a46d69494516409c529d8c5d5ab04cf6d9c7..cef7bb8ec007330cde32bf666eefb861519b2330 100644 --- a/drivers/crypto/qat/qat_common/adf_init.c +++ b/drivers/crypto/qat/qat_common/adf_init.c @@ -209,6 +209,14 @@ int adf_dev_start(struct adf_accel_dev *accel_dev) clear_bit(ADF_STATUS_STARTED, &accel_dev->status); return -EFAULT; } + + if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) { + dev_err(&GET_DEV(accel_dev), + "Failed to register compression algs\n"); + set_bit(ADF_STATUS_STARTING, &accel_dev->status); + clear_bit(ADF_STATUS_STARTED, &accel_dev->status); + return -EFAULT; + } return 0; } EXPORT_SYMBOL_GPL(adf_dev_start); @@ -242,6 +250,9 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev) qat_asym_algs_unregister(); } + if (!list_empty(&accel_dev->compression_list)) + qat_comp_algs_unregister(); + list_for_each(list_itr, &service_table) { service = list_entry(list_itr, struct service_hndl, list); if (!test_bit(accel_dev->accel_id, service->start_status)) diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index b2db1d70d71fbd8462887a6324226eb884503c72..d85a90cc387b4f6337599c979a0a069a8335b020 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -170,6 +170,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs) if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, (void *)&val, ADF_DEC)) return -EFAULT; + ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC, + &val, ADF_DEC); + if (ret) + return ret; set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw.h b/drivers/crypto/qat/qat_common/icp_qat_fw.h index 6dc09d270082f9136874e03fb032d94d26ad029d..c141160421e19a51fed9c60d71572b60f304844e 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_fw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_fw.h @@ -116,6 +116,10 @@ struct icp_qat_fw_comn_resp { #define ICP_QAT_FW_COMN_VALID_FLAG_BITPOS 7 #define ICP_QAT_FW_COMN_VALID_FLAG_MASK 0x1 #define ICP_QAT_FW_COMN_HDR_RESRVD_FLD_MASK 0x7F +#define ICP_QAT_FW_COMN_CNV_FLAG_BITPOS 6 +#define ICP_QAT_FW_COMN_CNV_FLAG_MASK 0x1 +#define ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS 5 +#define ICP_QAT_FW_COMN_CNVNR_FLAG_MASK 0x1 #define ICP_QAT_FW_COMN_OV_SRV_TYPE_GET(icp_qat_fw_comn_req_hdr_t) \ icp_qat_fw_comn_req_hdr_t.service_type @@ -132,6 +136,26 @@ struct icp_qat_fw_comn_resp { #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_GET(hdr_t) \ ICP_QAT_FW_COMN_VALID_FLAG_GET(hdr_t.hdr_flags) +#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_GET(hdr_flags) \ + QAT_FIELD_GET(hdr_flags, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_SET(hdr_t, val) \ + QAT_FIELD_SET((hdr_t.hdr_flags), (val), \ + ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNVNR_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(hdr_flags) \ + QAT_FIELD_GET(hdr_flags, \ + ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNV_FLAG_MASK) + +#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_SET(hdr_t, val) \ + QAT_FIELD_SET((hdr_t.hdr_flags), (val), \ + ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \ + ICP_QAT_FW_COMN_CNV_FLAG_MASK) + #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_SET(hdr_t, val) \ ICP_QAT_FW_COMN_VALID_FLAG_SET(hdr_t, val) diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h new file mode 100644 index 0000000000000000000000000000000000000000..a03d43fef2b370e0583f5a2fee8ff5b707836bcb --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h @@ -0,0 +1,404 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_FW_COMP_H_ +#define _ICP_QAT_FW_COMP_H_ +#include "icp_qat_fw.h" + +enum icp_qat_fw_comp_cmd_id { + ICP_QAT_FW_COMP_CMD_STATIC = 0, + ICP_QAT_FW_COMP_CMD_DYNAMIC = 1, + ICP_QAT_FW_COMP_CMD_DECOMPRESS = 2, + ICP_QAT_FW_COMP_CMD_DELIMITER +}; + +enum icp_qat_fw_comp_20_cmd_id { + ICP_QAT_FW_COMP_20_CMD_LZ4_COMPRESS = 3, + ICP_QAT_FW_COMP_20_CMD_LZ4_DECOMPRESS = 4, + ICP_QAT_FW_COMP_20_CMD_LZ4S_COMPRESS = 5, + ICP_QAT_FW_COMP_20_CMD_LZ4S_DECOMPRESS = 6, + ICP_QAT_FW_COMP_20_CMD_XP10_COMPRESS = 7, + ICP_QAT_FW_COMP_20_CMD_XP10_DECOMPRESS = 8, + ICP_QAT_FW_COMP_20_CMD_RESERVED_9 = 9, + ICP_QAT_FW_COMP_23_CMD_ZSTD_COMPRESS = 10, + ICP_QAT_FW_COMP_23_CMD_ZSTD_DECOMPRESS = 11, + ICP_QAT_FW_COMP_20_CMD_DELIMITER +}; + +#define ICP_QAT_FW_COMP_STATELESS_SESSION 0 +#define ICP_QAT_FW_COMP_STATEFUL_SESSION 1 +#define ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_ENH_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 0 +#define ICP_QAT_FW_COMP_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 1 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_USED_AS_INTMD_BUF 1 +#define ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF 0 +#define ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS 2 +#define ICP_QAT_FW_COMP_SESSION_TYPE_MASK 0x1 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS 3 +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK 0x1 +#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS 4 +#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK 0x1 +#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS 5 +#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK 0x1 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS 7 +#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK 0x1 + +#define ICP_QAT_FW_COMP_FLAGS_BUILD(sesstype, autoselect, enhanced_asb, \ + ret_uncomp, secure_ram) \ + ((((sesstype) & ICP_QAT_FW_COMP_SESSION_TYPE_MASK) << \ + ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS) | \ + (((autoselect) & ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK) << \ + ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS) | \ + (((enhanced_asb) & ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK) << \ + ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS) | \ + (((ret_uncomp) & ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK) << \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS) | \ + (((secure_ram) & ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK) << \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS)) + +#define ICP_QAT_FW_COMP_SESSION_TYPE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \ + ICP_QAT_FW_COMP_SESSION_TYPE_MASK) + +#define ICP_QAT_FW_COMP_SESSION_TYPE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \ + ICP_QAT_FW_COMP_SESSION_TYPE_MASK) + +#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS, \ + ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK) + +#define ICP_QAT_FW_COMP_EN_ASB_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS, \ + ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK) + +#define ICP_QAT_FW_COMP_RET_UNCOMP_GET(flags) \ + QAT_FIELD_GET(flags, \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS, \ + ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK) + +#define ICP_QAT_FW_COMP_SECURE_RAM_USE_GET(flags) \ + QAT_FIELD_GET(flags, \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS, \ + ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK) + +struct icp_qat_fw_comp_req_hdr_cd_pars { + union { + struct { + __u64 content_desc_addr; + __u16 content_desc_resrvd1; + __u8 content_desc_params_sz; + __u8 content_desc_hdr_resrvd2; + __u32 content_desc_resrvd3; + } s; + struct { + __u32 comp_slice_cfg_word[ICP_QAT_FW_NUM_LONGWORDS_2]; + __u32 content_desc_resrvd4; + } sl; + } u; +}; + +struct icp_qat_fw_comp_req_params { + __u32 comp_len; + __u32 out_buffer_sz; + union { + struct { + __u32 initial_crc32; + __u32 initial_adler; + } legacy; + __u64 crc_data_addr; + } crc; + __u32 req_par_flags; + __u32 rsrvd; +}; + +#define ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(sop, eop, bfinal, cnv, cnvnr, \ + cnvdfx, crc, xxhash_acc, \ + cnv_error_type, append_crc, \ + drop_data) \ + ((((sop) & ICP_QAT_FW_COMP_SOP_MASK) << \ + ICP_QAT_FW_COMP_SOP_BITPOS) | \ + (((eop) & ICP_QAT_FW_COMP_EOP_MASK) << \ + ICP_QAT_FW_COMP_EOP_BITPOS) | \ + (((bfinal) & ICP_QAT_FW_COMP_BFINAL_MASK) \ + << ICP_QAT_FW_COMP_BFINAL_BITPOS) | \ + (((cnv) & ICP_QAT_FW_COMP_CNV_MASK) << \ + ICP_QAT_FW_COMP_CNV_BITPOS) | \ + (((cnvnr) & ICP_QAT_FW_COMP_CNVNR_MASK) \ + << ICP_QAT_FW_COMP_CNVNR_BITPOS) | \ + (((cnvdfx) & ICP_QAT_FW_COMP_CNV_DFX_MASK) \ + << ICP_QAT_FW_COMP_CNV_DFX_BITPOS) | \ + (((crc) & ICP_QAT_FW_COMP_CRC_MODE_MASK) \ + << ICP_QAT_FW_COMP_CRC_MODE_BITPOS) | \ + (((xxhash_acc) & ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) \ + << ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS) | \ + (((cnv_error_type) & ICP_QAT_FW_COMP_CNV_ERROR_MASK) \ + << ICP_QAT_FW_COMP_CNV_ERROR_BITPOS) | \ + (((append_crc) & ICP_QAT_FW_COMP_APPEND_CRC_MASK) \ + << ICP_QAT_FW_COMP_APPEND_CRC_BITPOS) | \ + (((drop_data) & ICP_QAT_FW_COMP_DROP_DATA_MASK) \ + << ICP_QAT_FW_COMP_DROP_DATA_BITPOS)) + +#define ICP_QAT_FW_COMP_NOT_SOP 0 +#define ICP_QAT_FW_COMP_SOP 1 +#define ICP_QAT_FW_COMP_NOT_EOP 0 +#define ICP_QAT_FW_COMP_EOP 1 +#define ICP_QAT_FW_COMP_NOT_BFINAL 0 +#define ICP_QAT_FW_COMP_BFINAL 1 +#define ICP_QAT_FW_COMP_NO_CNV 0 +#define ICP_QAT_FW_COMP_CNV 1 +#define ICP_QAT_FW_COMP_NO_CNV_RECOVERY 0 +#define ICP_QAT_FW_COMP_CNV_RECOVERY 1 +#define ICP_QAT_FW_COMP_NO_CNV_DFX 0 +#define ICP_QAT_FW_COMP_CNV_DFX 1 +#define ICP_QAT_FW_COMP_CRC_MODE_LEGACY 0 +#define ICP_QAT_FW_COMP_CRC_MODE_E2E 1 +#define ICP_QAT_FW_COMP_NO_XXHASH_ACC 0 +#define ICP_QAT_FW_COMP_XXHASH_ACC 1 +#define ICP_QAT_FW_COMP_APPEND_CRC 1 +#define ICP_QAT_FW_COMP_NO_APPEND_CRC 0 +#define ICP_QAT_FW_COMP_DROP_DATA 1 +#define ICP_QAT_FW_COMP_NO_DROP_DATA 0 +#define ICP_QAT_FW_COMP_SOP_BITPOS 0 +#define ICP_QAT_FW_COMP_SOP_MASK 0x1 +#define ICP_QAT_FW_COMP_EOP_BITPOS 1 +#define ICP_QAT_FW_COMP_EOP_MASK 0x1 +#define ICP_QAT_FW_COMP_BFINAL_BITPOS 6 +#define ICP_QAT_FW_COMP_BFINAL_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_BITPOS 16 +#define ICP_QAT_FW_COMP_CNV_MASK 0x1 +#define ICP_QAT_FW_COMP_CNVNR_BITPOS 17 +#define ICP_QAT_FW_COMP_CNVNR_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_DFX_BITPOS 18 +#define ICP_QAT_FW_COMP_CNV_DFX_MASK 0x1 +#define ICP_QAT_FW_COMP_CRC_MODE_BITPOS 19 +#define ICP_QAT_FW_COMP_CRC_MODE_MASK 0x1 +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS 20 +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK 0x1 +#define ICP_QAT_FW_COMP_CNV_ERROR_BITPOS 21 +#define ICP_QAT_FW_COMP_CNV_ERROR_MASK 0b111 +#define ICP_QAT_FW_COMP_CNV_ERROR_NONE 0b000 +#define ICP_QAT_FW_COMP_CNV_ERROR_CHECKSUM 0b001 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_OBC_DIFF 0b010 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR 0b011 +#define ICP_QAT_FW_COMP_CNV_ERROR_XLT 0b100 +#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_IBC_DIFF 0b101 +#define ICP_QAT_FW_COMP_APPEND_CRC_BITPOS 24 +#define ICP_QAT_FW_COMP_APPEND_CRC_MASK 0x1 +#define ICP_QAT_FW_COMP_DROP_DATA_BITPOS 25 +#define ICP_QAT_FW_COMP_DROP_DATA_MASK 0x1 + +#define ICP_QAT_FW_COMP_SOP_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SOP_BITPOS, \ + ICP_QAT_FW_COMP_SOP_MASK) + +#define ICP_QAT_FW_COMP_SOP_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SOP_BITPOS, \ + ICP_QAT_FW_COMP_SOP_MASK) + +#define ICP_QAT_FW_COMP_EOP_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_EOP_BITPOS, \ + ICP_QAT_FW_COMP_EOP_MASK) + +#define ICP_QAT_FW_COMP_EOP_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_EOP_BITPOS, \ + ICP_QAT_FW_COMP_EOP_MASK) + +#define ICP_QAT_FW_COMP_BFINAL_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_BFINAL_BITPOS, \ + ICP_QAT_FW_COMP_BFINAL_MASK) + +#define ICP_QAT_FW_COMP_BFINAL_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_BFINAL_BITPOS, \ + ICP_QAT_FW_COMP_BFINAL_MASK) + +#define ICP_QAT_FW_COMP_CNV_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_BITPOS, \ + ICP_QAT_FW_COMP_CNV_MASK) + +#define ICP_QAT_FW_COMP_CNVNR_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNVNR_BITPOS, \ + ICP_QAT_FW_COMP_CNVNR_MASK) + +#define ICP_QAT_FW_COMP_CNV_DFX_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \ + ICP_QAT_FW_COMP_CNV_DFX_MASK) + +#define ICP_QAT_FW_COMP_CNV_DFX_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \ + ICP_QAT_FW_COMP_CNV_DFX_MASK) + +#define ICP_QAT_FW_COMP_CRC_MODE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CRC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_CRC_MODE_MASK) + +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) + +#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \ + ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) + +#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_GET(flags) \ + QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \ + ICP_QAT_FW_COMP_CNV_ERROR_MASK) + +#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_SET(flags, val) \ + QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \ + ICP_QAT_FW_COMP_CNV_ERROR_MASK) + +struct icp_qat_fw_xlt_req_params { + __u64 inter_buff_ptr; +}; + +struct icp_qat_fw_comp_cd_hdr { + __u16 ram_bank_flags; + __u8 comp_cfg_offset; + __u8 next_curr_id; + __u32 resrvd; + __u64 comp_state_addr; + __u64 ram_banks_addr; +}; + +#define COMP_CPR_INITIAL_CRC 0 +#define COMP_CPR_INITIAL_ADLER 1 + +struct icp_qat_fw_xlt_cd_hdr { + __u16 resrvd1; + __u8 resrvd2; + __u8 next_curr_id; + __u32 resrvd3; +}; + +struct icp_qat_fw_comp_req { + struct icp_qat_fw_comn_req_hdr comn_hdr; + struct icp_qat_fw_comp_req_hdr_cd_pars cd_pars; + struct icp_qat_fw_comn_req_mid comn_mid; + struct icp_qat_fw_comp_req_params comp_pars; + union { + struct icp_qat_fw_xlt_req_params xlt_pars; + __u32 resrvd1[ICP_QAT_FW_NUM_LONGWORDS_2]; + } u1; + __u32 resrvd2[ICP_QAT_FW_NUM_LONGWORDS_2]; + struct icp_qat_fw_comp_cd_hdr comp_cd_ctrl; + union { + struct icp_qat_fw_xlt_cd_hdr xlt_cd_ctrl; + __u32 resrvd3[ICP_QAT_FW_NUM_LONGWORDS_2]; + } u2; +}; + +struct icp_qat_fw_resp_comp_pars { + __u32 input_byte_counter; + __u32 output_byte_counter; + union { + struct { + __u32 curr_crc32; + __u32 curr_adler_32; + } legacy; + __u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_2]; + } crc; +}; + +struct icp_qat_fw_comp_state { + __u32 rd8_counter; + __u32 status_flags; + __u32 in_counter; + __u32 out_counter; + __u64 intermediate_state; + __u32 lobc; + __u32 replaybc; + __u64 pcrc64_poly; + __u32 crc32; + __u32 adler_xxhash32; + __u64 pcrc64_xorout; + __u32 out_buf_size; + __u32 in_buf_size; + __u64 in_pcrc64; + __u64 out_pcrc64; + __u32 lobs; + __u32 libc; + __u64 reserved; + __u32 xxhash_state[4]; + __u32 cleartext[4]; +}; + +struct icp_qat_fw_comp_resp { + struct icp_qat_fw_comn_resp_hdr comn_resp; + __u64 opaque_data; + struct icp_qat_fw_resp_comp_pars comp_resp_pars; +}; + +#define QAT_FW_COMP_BANK_FLAG_MASK 0x1 +#define QAT_FW_COMP_BANK_I_BITPOS 8 +#define QAT_FW_COMP_BANK_H_BITPOS 7 +#define QAT_FW_COMP_BANK_G_BITPOS 6 +#define QAT_FW_COMP_BANK_F_BITPOS 5 +#define QAT_FW_COMP_BANK_E_BITPOS 4 +#define QAT_FW_COMP_BANK_D_BITPOS 3 +#define QAT_FW_COMP_BANK_C_BITPOS 2 +#define QAT_FW_COMP_BANK_B_BITPOS 1 +#define QAT_FW_COMP_BANK_A_BITPOS 0 + +enum icp_qat_fw_comp_bank_enabled { + ICP_QAT_FW_COMP_BANK_DISABLED = 0, + ICP_QAT_FW_COMP_BANK_ENABLED = 1, + ICP_QAT_FW_COMP_BANK_DELIMITER = 2 +}; + +#define ICP_QAT_FW_COMP_RAM_FLAGS_BUILD(bank_i_enable, bank_h_enable, \ + bank_g_enable, bank_f_enable, \ + bank_e_enable, bank_d_enable, \ + bank_c_enable, bank_b_enable, \ + bank_a_enable) \ + ((((bank_i_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_I_BITPOS) | \ + (((bank_h_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_H_BITPOS) | \ + (((bank_g_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_G_BITPOS) | \ + (((bank_f_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_F_BITPOS) | \ + (((bank_e_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_E_BITPOS) | \ + (((bank_d_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_D_BITPOS) | \ + (((bank_c_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_C_BITPOS) | \ + (((bank_b_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_B_BITPOS) | \ + (((bank_a_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \ + QAT_FW_COMP_BANK_A_BITPOS)) + +struct icp_qat_fw_comp_crc_data_struct { + __u32 crc32; + union { + __u32 adler; + __u32 xxhash; + } adler_xxhash_u; + __u32 cpr_in_crc_lo; + __u32 cpr_in_crc_hi; + __u32 cpr_out_crc_lo; + __u32 cpr_out_crc_hi; + __u32 xlt_in_crc_lo; + __u32 xlt_in_crc_hi; + __u32 xlt_out_crc_lo; + __u32 xlt_out_crc_hi; + __u32 prog_crc_poly_lo; + __u32 prog_crc_poly_hi; + __u32 xor_out_lo; + __u32 xor_out_hi; + __u32 append_crc_lo; + __u32 append_crc_hi; +}; + +struct xxhash_acc_state_buff { + __u32 in_counter; + __u32 out_counter; + __u32 xxhash_state[4]; + __u32 clear_txt[4]; +}; + +#endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h index 433304cad2edf22a0a6f8c954e798e949b964f2b..4042739bb6fa9d38412c76d889623706c32ed712 100644 --- a/drivers/crypto/qat/qat_common/icp_qat_hw.h +++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h @@ -307,4 +307,70 @@ struct icp_qat_hw_cipher_algo_blk { struct icp_qat_hw_ucs_cipher_aes256_f8 ucs_aes; }; } __aligned(64); + +enum icp_qat_hw_compression_direction { + ICP_QAT_HW_COMPRESSION_DIR_COMPRESS = 0, + ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS = 1, + ICP_QAT_HW_COMPRESSION_DIR_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_delayed_match { + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED = 0, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_ENABLED = 1, + ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_algo { + ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE = 0, + ICP_QAT_HW_COMPRESSION_ALGO_LZS = 1, + ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER = 2 +}; + +enum icp_qat_hw_compression_depth { + ICP_QAT_HW_COMPRESSION_DEPTH_1 = 0, + ICP_QAT_HW_COMPRESSION_DEPTH_4 = 1, + ICP_QAT_HW_COMPRESSION_DEPTH_8 = 2, + ICP_QAT_HW_COMPRESSION_DEPTH_16 = 3, + ICP_QAT_HW_COMPRESSION_DEPTH_128 = 4, + ICP_QAT_HW_COMPRESSION_DEPTH_DELIMITER = 5 +}; + +enum icp_qat_hw_compression_file_type { + ICP_QAT_HW_COMPRESSION_FILE_TYPE_0 = 0, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_1 = 1, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_2 = 2, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_3 = 3, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_4 = 4, + ICP_QAT_HW_COMPRESSION_FILE_TYPE_DELIMITER = 5 +}; + +struct icp_qat_hw_compression_config { + __u32 lower_val; + __u32 upper_val; +}; + +#define QAT_COMPRESSION_DIR_BITPOS 4 +#define QAT_COMPRESSION_DIR_MASK 0x7 +#define QAT_COMPRESSION_DELAYED_MATCH_BITPOS 16 +#define QAT_COMPRESSION_DELAYED_MATCH_MASK 0x1 +#define QAT_COMPRESSION_ALGO_BITPOS 31 +#define QAT_COMPRESSION_ALGO_MASK 0x1 +#define QAT_COMPRESSION_DEPTH_BITPOS 28 +#define QAT_COMPRESSION_DEPTH_MASK 0x7 +#define QAT_COMPRESSION_FILE_TYPE_BITPOS 24 +#define QAT_COMPRESSION_FILE_TYPE_MASK 0xF + +#define ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(dir, delayed, \ + algo, depth, filetype) \ + ((((dir) & QAT_COMPRESSION_DIR_MASK) << \ + QAT_COMPRESSION_DIR_BITPOS) | \ + (((delayed) & QAT_COMPRESSION_DELAYED_MATCH_MASK) << \ + QAT_COMPRESSION_DELAYED_MATCH_BITPOS) | \ + (((algo) & QAT_COMPRESSION_ALGO_MASK) << \ + QAT_COMPRESSION_ALGO_BITPOS) | \ + (((depth) & QAT_COMPRESSION_DEPTH_MASK) << \ + QAT_COMPRESSION_DEPTH_BITPOS) | \ + (((filetype) & QAT_COMPRESSION_FILE_TYPE_MASK) << \ + QAT_COMPRESSION_FILE_TYPE_BITPOS)) + #endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h new file mode 100644 index 0000000000000000000000000000000000000000..7ea8962272f2f43ca17583cc03438530c406f4f2 --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h @@ -0,0 +1,164 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_HW_20_COMP_H_ +#define _ICP_QAT_HW_20_COMP_H_ + +#include "icp_qat_hw_20_comp_defs.h" +#include "icp_qat_fw.h" + +struct icp_qat_hw_comp_20_config_csr_lower { + enum icp_qat_hw_comp_20_extended_delay_match_mode edmm; + enum icp_qat_hw_comp_20_hw_comp_format algo; + enum icp_qat_hw_comp_20_search_depth sd; + enum icp_qat_hw_comp_20_hbs_control hbs; + enum icp_qat_hw_comp_20_abd abd; + enum icp_qat_hw_comp_20_lllbd_ctrl lllbd; + enum icp_qat_hw_comp_20_min_match_control mmctrl; + enum icp_qat_hw_comp_20_skip_hash_collision hash_col; + enum icp_qat_hw_comp_20_skip_hash_update hash_update; + enum icp_qat_hw_comp_20_byte_skip skip_ctrl; +}; + +static inline __u32 +ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_comp_20_config_csr_lower csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.algo, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK); + QAT_FIELD_SET(val32, csr.sd, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK); + QAT_FIELD_SET(val32, csr.edmm, + ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK); + QAT_FIELD_SET(val32, csr.hbs, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lllbd, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK); + QAT_FIELD_SET(val32, csr.mmctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.hash_col, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK); + QAT_FIELD_SET(val32, csr.hash_update, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK); + QAT_FIELD_SET(val32, csr.skip_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK); + QAT_FIELD_SET(val32, csr.abd, ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_comp_20_config_csr_upper { + enum icp_qat_hw_comp_20_scb_control scb_ctrl; + enum icp_qat_hw_comp_20_rmb_control rmb_ctrl; + enum icp_qat_hw_comp_20_som_control som_ctrl; + enum icp_qat_hw_comp_20_skip_hash_rd_control skip_hash_ctrl; + enum icp_qat_hw_comp_20_scb_unload_control scb_unload_ctrl; + enum icp_qat_hw_comp_20_disable_token_fusion_control disable_token_fusion_ctrl; + enum icp_qat_hw_comp_20_lbms lbms; + enum icp_qat_hw_comp_20_scb_mode_reset_mask scb_mode_reset; + __u16 lazy; + __u16 nice; +}; + +static inline __u32 +ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_comp_20_config_csr_upper csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.scb_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.rmb_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.som_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.skip_hash_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.scb_unload_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.disable_token_fusion_ctrl, + ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbms, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK); + QAT_FIELD_SET(val32, csr.scb_mode_reset, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK); + QAT_FIELD_SET(val32, csr.lazy, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK); + QAT_FIELD_SET(val32, csr.nice, + ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS, + ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_decomp_20_config_csr_lower { + enum icp_qat_hw_decomp_20_hbs_control hbs; + enum icp_qat_hw_decomp_20_lbms lbms; + enum icp_qat_hw_decomp_20_hw_comp_format algo; + enum icp_qat_hw_decomp_20_min_match_control mmctrl; + enum icp_qat_hw_decomp_20_lz4_block_checksum_present lbc; +}; + +static inline __u32 +ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_decomp_20_config_csr_lower csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.hbs, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbms, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK); + QAT_FIELD_SET(val32, csr.algo, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK); + QAT_FIELD_SET(val32, csr.mmctrl, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.lbc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK); + + return __builtin_bswap32(val32); +} + +struct icp_qat_hw_decomp_20_config_csr_upper { + enum icp_qat_hw_decomp_20_speculative_decoder_control sdc; + enum icp_qat_hw_decomp_20_mini_cam_control mcc; +}; + +static inline __u32 +ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_decomp_20_config_csr_upper csr) +{ + u32 val32 = 0; + + QAT_FIELD_SET(val32, csr.sdc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK); + QAT_FIELD_SET(val32, csr.mcc, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS, + ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK); + + return __builtin_bswap32(val32); +} + +#endif diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h new file mode 100644 index 0000000000000000000000000000000000000000..208d4554283bfc8cffa50657a3f1f551f5db6007 --- /dev/null +++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h @@ -0,0 +1,300 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _ICP_QAT_HW_20_COMP_DEFS_H +#define _ICP_QAT_HW_20_COMP_DEFS_H + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS 31 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_control { + ICP_QAT_HW_COMP_20_SCB_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS 30 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_rmb_control { + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL = 0x0, + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_FC_ONLY = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS 28 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK 0x3 + +enum icp_qat_hw_comp_20_som_control { + ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE = 0x0, + ICP_QAT_HW_COMP_20_SOM_CONTROL_REPLAY_MODE = 0x1, + ICP_QAT_HW_COMP_20_SOM_CONTROL_INPUT_CRC = 0x2, + ICP_QAT_HW_COMP_20_SOM_CONTROL_RESERVED_MODE = 0x3, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS 27 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_rd_control { + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_SKIP_HASH_READS = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS 26 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_unload_control { + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD = 0x0, + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_NO_UNLOAD = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS 21 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_disable_token_fusion_control { + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS 19 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK 0x3 + +enum icp_qat_hw_comp_20_lbms { + ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB = 0x0, + ICP_QAT_HW_COMP_20_LBMS_LBMS_256KB = 0x1, + ICP_QAT_HW_COMP_20_LBMS_LBMS_1MB = 0x2, + ICP_QAT_HW_COMP_20_LBMS_LBMS_4MB = 0x3, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS 18 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK 0x1 + +enum icp_qat_hw_comp_20_scb_mode_reset_mask { + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS = 0x0, + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS_AND_HISTORY = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS 9 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK 0x1ff +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL 258 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS 0 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK 0x1ff +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL 259 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7 + +enum icp_qat_hw_comp_20_hbs_control { + ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0, + ICP_QAT_HW_COMP_23_HBS_CONTROL_HBS_IS_64KB = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS 13 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK 0x1 + +enum icp_qat_hw_comp_20_abd { + ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED = 0x0, + ICP_QAT_HW_COMP_20_ABD_ABD_DISABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS 12 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK 0x1 + +enum icp_qat_hw_comp_20_lllbd_ctrl { + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED = 0x0, + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_DISABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS 8 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK 0xf + +enum icp_qat_hw_comp_20_search_depth { + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1 = 0x1, + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_6 = 0x3, + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_9 = 0x4, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1 + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS 5 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK 0x7 + +enum icp_qat_hw_comp_20_hw_comp_format { + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77 = 0x0, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE = 0x1, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4 = 0x2, + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4S = 0x3, + ICP_QAT_HW_COMP_23_HW_COMP_FORMAT_ZSTD = 0x4, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1 + +enum icp_qat_hw_comp_20_min_match_control { + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0, + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS 3 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_collision { + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_DONT_ALLOW = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS 2 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK 0x1 + +enum icp_qat_hw_comp_20_skip_hash_update { + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW = 0x0, + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS 1 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK 0x1 + +enum icp_qat_hw_comp_20_byte_skip { + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN = 0x0, + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS 0 +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK 0x1 + +enum icp_qat_hw_comp_20_extended_delay_match_mode { + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED = 0x0, + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED = 0x1, +}; + +#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_DEFAULT_VAL \ + ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS 31 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_speculative_decoder_control { + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS 30 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_mini_cam_control { + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE = 0x0, + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_DISABLE = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7 + +enum icp_qat_hw_decomp_20_hbs_control { + ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS 8 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK 0x3 + +enum icp_qat_hw_decomp_20_lbms { + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB = 0x0, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_256KB = 0x1, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_1MB = 0x2, + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_4MB = 0x3, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS 5 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK 0x7 + +enum icp_qat_hw_decomp_20_hw_comp_format { + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE = 0x1, + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4 = 0x2, + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4S = 0x3, + ICP_QAT_HW_DECOMP_23_HW_DECOMP_FORMAT_ZSTD = 0x4, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1 + +enum icp_qat_hw_decomp_20_min_match_control { + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0, + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS 3 +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK 0x1 + +enum icp_qat_hw_decomp_20_lz4_block_checksum_present { + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT = 0x0, + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_PRESENT = 0x1, +}; + +#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_DEFAULT_VAL \ + ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index cad9c58caab13504ab29e66fa2e0bd5eaefc94f9..b4b9f0aa59b9860124af6d415befbcf0f9f8bb9b 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -23,6 +23,7 @@ #include "icp_qat_hw.h" #include "icp_qat_fw.h" #include "icp_qat_fw_la.h" +#include "qat_bl.h" #define QAT_AES_HW_CONFIG_ENC(alg, mode) \ ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \ @@ -663,189 +664,6 @@ static int qat_alg_aead_setkey(struct crypto_aead *tfm, const u8 *key, return qat_alg_aead_newkey(tfm, key, keylen); } -static void qat_alg_free_bufl(struct qat_crypto_instance *inst, - struct qat_crypto_request *qat_req) -{ - struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_alg_buf_list *bl = qat_req->buf.bl; - struct qat_alg_buf_list *blout = qat_req->buf.blout; - dma_addr_t blp = qat_req->buf.blp; - dma_addr_t blpout = qat_req->buf.bloutp; - size_t sz = qat_req->buf.sz; - size_t sz_out = qat_req->buf.sz_out; - int bl_dma_dir; - int i; - - bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; - - for (i = 0; i < bl->num_bufs; i++) - dma_unmap_single(dev, bl->bufers[i].addr, - bl->bufers[i].len, bl_dma_dir); - - dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - - if (!qat_req->buf.sgl_src_valid) - kfree(bl); - - if (blp != blpout) { - /* If out of place operation dma unmap only data */ - int bufless = blout->num_bufs - blout->num_mapped_bufs; - - for (i = bufless; i < blout->num_bufs; i++) { - dma_unmap_single(dev, blout->bufers[i].addr, - blout->bufers[i].len, - DMA_FROM_DEVICE); - } - dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); - - if (!qat_req->buf.sgl_dst_valid) - kfree(blout); - } -} - -static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst, - struct scatterlist *sgl, - struct scatterlist *sglout, - struct qat_crypto_request *qat_req, - gfp_t flags) -{ - struct device *dev = &GET_DEV(inst->accel_dev); - int i, sg_nctr = 0; - int n = sg_nents(sgl); - struct qat_alg_buf_list *bufl; - struct qat_alg_buf_list *buflout = NULL; - dma_addr_t blp = DMA_MAPPING_ERROR; - dma_addr_t bloutp = DMA_MAPPING_ERROR; - struct scatterlist *sg; - size_t sz_out, sz = struct_size(bufl, bufers, n); - int node = dev_to_node(&GET_DEV(inst->accel_dev)); - int bufl_dma_dir; - - if (unlikely(!n)) - return -EINVAL; - - qat_req->buf.sgl_src_valid = false; - qat_req->buf.sgl_dst_valid = false; - - if (n > QAT_MAX_BUFF_DESC) { - bufl = kzalloc_node(sz, flags, node); - if (unlikely(!bufl)) - return -ENOMEM; - } else { - bufl = &qat_req->buf.sgl_src.sgl_hdr; - memset(bufl, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_src_valid = true; - } - - bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; - - for_each_sg(sgl, sg, n, i) - bufl->bufers[i].addr = DMA_MAPPING_ERROR; - - for_each_sg(sgl, sg, n, i) { - int y = sg_nctr; - - if (!sg->length) - continue; - - bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - bufl_dma_dir); - bufl->bufers[y].len = sg->length; - if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) - goto err_in; - sg_nctr++; - } - bufl->num_bufs = sg_nctr; - blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, blp))) - goto err_in; - qat_req->buf.bl = bufl; - qat_req->buf.blp = blp; - qat_req->buf.sz = sz; - /* Handle out of place operation */ - if (sgl != sglout) { - struct qat_alg_buf *bufers; - - n = sg_nents(sglout); - sz_out = struct_size(buflout, bufers, n); - sg_nctr = 0; - - if (n > QAT_MAX_BUFF_DESC) { - buflout = kzalloc_node(sz_out, flags, node); - if (unlikely(!buflout)) - goto err_in; - } else { - buflout = &qat_req->buf.sgl_dst.sgl_hdr; - memset(buflout, 0, sizeof(struct qat_alg_buf_list)); - qat_req->buf.sgl_dst_valid = true; - } - - bufers = buflout->bufers; - for_each_sg(sglout, sg, n, i) - bufers[i].addr = DMA_MAPPING_ERROR; - - for_each_sg(sglout, sg, n, i) { - int y = sg_nctr; - - if (!sg->length) - continue; - - bufers[y].addr = dma_map_single(dev, sg_virt(sg), - sg->length, - DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, bufers[y].addr))) - goto err_out; - bufers[y].len = sg->length; - sg_nctr++; - } - buflout->num_bufs = sg_nctr; - buflout->num_mapped_bufs = sg_nctr; - bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, bloutp))) - goto err_out; - qat_req->buf.blout = buflout; - qat_req->buf.bloutp = bloutp; - qat_req->buf.sz_out = sz_out; - } else { - /* Otherwise set the src and dst to the same address */ - qat_req->buf.bloutp = qat_req->buf.blp; - qat_req->buf.sz_out = 0; - } - return 0; - -err_out: - if (!dma_mapping_error(dev, bloutp)) - dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); - - n = sg_nents(sglout); - for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, buflout->bufers[i].addr)) - dma_unmap_single(dev, buflout->bufers[i].addr, - buflout->bufers[i].len, - DMA_FROM_DEVICE); - - if (!qat_req->buf.sgl_dst_valid) - kfree(buflout); - -err_in: - if (!dma_mapping_error(dev, blp)) - dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); - - n = sg_nents(sgl); - for (i = 0; i < n; i++) - if (!dma_mapping_error(dev, bufl->bufers[i].addr)) - dma_unmap_single(dev, bufl->bufers[i].addr, - bufl->bufers[i].len, - bufl_dma_dir); - - if (!qat_req->buf.sgl_src_valid) - kfree(bufl); - - dev_err(dev, "Failed to map buf for dma\n"); - return -ENOMEM; -} - static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, struct qat_crypto_request *qat_req) { @@ -855,7 +673,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_alg_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EBADMSG; areq->base.complete(&areq->base, res); @@ -925,7 +743,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp, u8 stat_filed = qat_resp->comn_resp.comn_status; int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed); - qat_alg_free_bufl(inst, qat_req); + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) res = -EINVAL; @@ -981,7 +799,8 @@ static int qat_alg_aead_dec(struct aead_request *areq) if (cipher_len % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1003,7 +822,7 @@ static int qat_alg_aead_dec(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -1024,7 +843,8 @@ static int qat_alg_aead_enc(struct aead_request *areq) if (areq->cryptlen % AES_BLOCK_SIZE != 0) return -EINVAL; - ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1048,7 +868,7 @@ static int qat_alg_aead_enc(struct aead_request *areq) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -1209,7 +1029,8 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1230,7 +1051,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } @@ -1275,7 +1096,8 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) if (req->cryptlen == 0) return 0; - ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f); + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst, + &qat_req->buf, NULL, f); if (unlikely(ret)) return ret; @@ -1297,7 +1119,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req) ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base); if (ret == -ENOSPC) - qat_alg_free_bufl(ctx->inst, qat_req); + qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf); return ret; } diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h index 5ce9f4f69d8ff8339e9b64c215a19d57da4a13cd..0baca16e1eff001df731e5fb0583e79d34c695d5 100644 --- a/drivers/crypto/qat/qat_common/qat_algs_send.h +++ b/drivers/crypto/qat/qat_common/qat_algs_send.h @@ -3,7 +3,21 @@ #ifndef QAT_ALGS_SEND_H #define QAT_ALGS_SEND_H -#include "qat_crypto.h" +#include +#include "adf_transport_internal.h" + +struct qat_instance_backlog { + struct list_head list; + spinlock_t lock; /* protects backlog list */ +}; + +struct qat_alg_req { + u32 *fw_req; + struct adf_etr_ring_data *tx_ring; + struct crypto_async_request *base; + struct list_head list; + struct qat_instance_backlog *backlog; +}; int qat_alg_send_message(struct qat_alg_req *req); void qat_alg_send_backlog(struct qat_instance_backlog *backlog); diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 94a26702aeae113762adb19dbe2e7909af4d393e..935a7e012946e30b3b703e7d427449909b53ec1e 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -494,6 +494,8 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm) if (!inst) return -EINVAL; + kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->p_size = 0; ctx->g2 = false; ctx->inst = inst; @@ -1230,6 +1232,8 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm) if (!inst) return -EINVAL; + akcipher_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64); + ctx->key_sz = 0; ctx->inst = inst; return 0; @@ -1252,7 +1256,6 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -1269,7 +1272,6 @@ static struct kpp_alg dh = { .max_size = qat_dh_max_size, .init = qat_dh_init_tfm, .exit = qat_dh_exit_tfm, - .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "dh", .cra_driver_name = "qat-dh", diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c new file mode 100644 index 0000000000000000000000000000000000000000..2e89ff08041bfaf135e8f04bddfbd5e84d22f5bf --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_bl.c @@ -0,0 +1,383 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2014 - 2022 Intel Corporation */ +#include +#include +#include +#include +#include +#include +#include "adf_accel_devices.h" +#include "qat_bl.h" +#include "qat_crypto.h" + +void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, + struct qat_request_buffs *buf) +{ + struct device *dev = &GET_DEV(accel_dev); + struct qat_alg_buf_list *bl = buf->bl; + struct qat_alg_buf_list *blout = buf->blout; + dma_addr_t blp = buf->blp; + dma_addr_t blpout = buf->bloutp; + size_t sz = buf->sz; + size_t sz_out = buf->sz_out; + int bl_dma_dir; + int i; + + bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + + for (i = 0; i < bl->num_bufs; i++) + dma_unmap_single(dev, bl->bufers[i].addr, + bl->bufers[i].len, bl_dma_dir); + + dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); + + if (!buf->sgl_src_valid) + kfree(bl); + + if (blp != blpout) { + for (i = 0; i < blout->num_mapped_bufs; i++) { + dma_unmap_single(dev, blout->bufers[i].addr, + blout->bufers[i].len, + DMA_FROM_DEVICE); + } + dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE); + + if (!buf->sgl_dst_valid) + kfree(blout); + } +} + +static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_request_buffs *buf, + dma_addr_t extra_dst_buff, + size_t sz_extra_dst_buff, + gfp_t flags) +{ + struct device *dev = &GET_DEV(accel_dev); + int i, sg_nctr = 0; + int n = sg_nents(sgl); + struct qat_alg_buf_list *bufl; + struct qat_alg_buf_list *buflout = NULL; + dma_addr_t blp = DMA_MAPPING_ERROR; + dma_addr_t bloutp = DMA_MAPPING_ERROR; + struct scatterlist *sg; + size_t sz_out, sz = struct_size(bufl, bufers, n); + int node = dev_to_node(&GET_DEV(accel_dev)); + int bufl_dma_dir; + + if (unlikely(!n)) + return -EINVAL; + + buf->sgl_src_valid = false; + buf->sgl_dst_valid = false; + + if (n > QAT_MAX_BUFF_DESC) { + bufl = kzalloc_node(sz, flags, node); + if (unlikely(!bufl)) + return -ENOMEM; + } else { + bufl = &buf->sgl_src.sgl_hdr; + memset(bufl, 0, sizeof(struct qat_alg_buf_list)); + buf->sgl_src_valid = true; + } + + bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL; + + for (i = 0; i < n; i++) + bufl->bufers[i].addr = DMA_MAPPING_ERROR; + + for_each_sg(sgl, sg, n, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + bufl_dma_dir); + bufl->bufers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + goto err_in; + sg_nctr++; + } + bufl->num_bufs = sg_nctr; + blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, blp))) + goto err_in; + buf->bl = bufl; + buf->blp = blp; + buf->sz = sz; + /* Handle out of place operation */ + if (sgl != sglout) { + struct qat_alg_buf *bufers; + int extra_buff = extra_dst_buff ? 1 : 0; + int n_sglout = sg_nents(sglout); + + n = n_sglout + extra_buff; + sz_out = struct_size(buflout, bufers, n); + sg_nctr = 0; + + if (n > QAT_MAX_BUFF_DESC) { + buflout = kzalloc_node(sz_out, flags, node); + if (unlikely(!buflout)) + goto err_in; + } else { + buflout = &buf->sgl_dst.sgl_hdr; + memset(buflout, 0, sizeof(struct qat_alg_buf_list)); + buf->sgl_dst_valid = true; + } + + bufers = buflout->bufers; + for (i = 0; i < n; i++) + bufers[i].addr = DMA_MAPPING_ERROR; + + for_each_sg(sglout, sg, n_sglout, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, bufers[y].addr))) + goto err_out; + bufers[y].len = sg->length; + sg_nctr++; + } + if (extra_buff) { + bufers[sg_nctr].addr = extra_dst_buff; + bufers[sg_nctr].len = sz_extra_dst_buff; + } + + buflout->num_bufs = sg_nctr; + buflout->num_bufs += extra_buff; + buflout->num_mapped_bufs = sg_nctr; + bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, bloutp))) + goto err_out; + buf->blout = buflout; + buf->bloutp = bloutp; + buf->sz_out = sz_out; + } else { + /* Otherwise set the src and dst to the same address */ + buf->bloutp = buf->blp; + buf->sz_out = 0; + } + return 0; + +err_out: + if (!dma_mapping_error(dev, bloutp)) + dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE); + + n = sg_nents(sglout); + for (i = 0; i < n; i++) { + if (buflout->bufers[i].addr == extra_dst_buff) + break; + if (!dma_mapping_error(dev, buflout->bufers[i].addr)) + dma_unmap_single(dev, buflout->bufers[i].addr, + buflout->bufers[i].len, + DMA_FROM_DEVICE); + } + + if (!buf->sgl_dst_valid) + kfree(buflout); + +err_in: + if (!dma_mapping_error(dev, blp)) + dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE); + + n = sg_nents(sgl); + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bufl->bufers[i].addr)) + dma_unmap_single(dev, bufl->bufers[i].addr, + bufl->bufers[i].len, + bufl_dma_dir); + + if (!buf->sgl_src_valid) + kfree(bufl); + + dev_err(dev, "Failed to map buf for dma\n"); + return -ENOMEM; +} + +int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_request_buffs *buf, + struct qat_sgl_to_bufl_params *params, + gfp_t flags) +{ + dma_addr_t extra_dst_buff = 0; + size_t sz_extra_dst_buff = 0; + + if (params) { + extra_dst_buff = params->extra_dst_buff; + sz_extra_dst_buff = params->sz_extra_dst_buff; + } + + return __qat_bl_sgl_to_bufl(accel_dev, sgl, sglout, buf, + extra_dst_buff, sz_extra_dst_buff, + flags); +} + +static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev, + struct qat_alg_buf_list *bl) +{ + struct device *dev = &GET_DEV(accel_dev); + int n = bl->num_bufs; + int i; + + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bl->bufers[i].addr)) + dma_unmap_single(dev, bl->bufers[i].addr, + bl->bufers[i].len, DMA_FROM_DEVICE); +} + +static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct qat_alg_buf_list **bl) +{ + struct device *dev = &GET_DEV(accel_dev); + struct qat_alg_buf_list *bufl; + int node = dev_to_node(dev); + struct scatterlist *sg; + int n, i, sg_nctr; + size_t sz; + + n = sg_nents(sgl); + sz = struct_size(bufl, bufers, n); + bufl = kzalloc_node(sz, GFP_KERNEL, node); + if (unlikely(!bufl)) + return -ENOMEM; + + for (i = 0; i < n; i++) + bufl->bufers[i].addr = DMA_MAPPING_ERROR; + + sg_nctr = 0; + for_each_sg(sgl, sg, n, i) { + int y = sg_nctr; + + if (!sg->length) + continue; + + bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg), + sg->length, + DMA_FROM_DEVICE); + bufl->bufers[y].len = sg->length; + if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr))) + goto err_map; + sg_nctr++; + } + bufl->num_bufs = sg_nctr; + bufl->num_mapped_bufs = sg_nctr; + + *bl = bufl; + + return 0; + +err_map: + for (i = 0; i < n; i++) + if (!dma_mapping_error(dev, bufl->bufers[i].addr)) + dma_unmap_single(dev, bufl->bufers[i].addr, + bufl->bufers[i].len, + DMA_FROM_DEVICE); + kfree(bufl); + *bl = NULL; + + return -ENOMEM; +} + +static void qat_bl_sgl_free_unmap(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct qat_alg_buf_list *bl, + bool free_bl) +{ + if (bl) { + qat_bl_sgl_unmap(accel_dev, bl); + + if (free_bl) + kfree(bl); + } + if (sgl) + sgl_free(sgl); +} + +static int qat_bl_sgl_alloc_map(struct adf_accel_dev *accel_dev, + struct scatterlist **sgl, + struct qat_alg_buf_list **bl, + unsigned int dlen, + gfp_t gfp) +{ + struct scatterlist *dst; + int ret; + + dst = sgl_alloc(dlen, gfp, NULL); + if (!dst) { + dev_err(&GET_DEV(accel_dev), "sg_alloc failed\n"); + return -ENOMEM; + } + + ret = qat_bl_sgl_map(accel_dev, dst, bl); + if (ret) + goto err; + + *sgl = dst; + + return 0; + +err: + sgl_free(dst); + *sgl = NULL; + return ret; +} + +int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev, + struct scatterlist **sg, + unsigned int dlen, + struct qat_request_buffs *qat_bufs, + gfp_t gfp) +{ + struct device *dev = &GET_DEV(accel_dev); + dma_addr_t new_blp = DMA_MAPPING_ERROR; + struct qat_alg_buf_list *new_bl; + struct scatterlist *new_sg; + size_t new_bl_size; + int ret; + + ret = qat_bl_sgl_alloc_map(accel_dev, &new_sg, &new_bl, dlen, gfp); + if (ret) + return ret; + + new_bl_size = struct_size(new_bl, bufers, new_bl->num_bufs); + + /* Map new firmware SGL descriptor */ + new_blp = dma_map_single(dev, new_bl, new_bl_size, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, new_blp))) + goto err; + + /* Unmap old firmware SGL descriptor */ + dma_unmap_single(dev, qat_bufs->bloutp, qat_bufs->sz_out, DMA_TO_DEVICE); + + /* Free and unmap old scatterlist */ + qat_bl_sgl_free_unmap(accel_dev, *sg, qat_bufs->blout, + !qat_bufs->sgl_dst_valid); + + qat_bufs->sgl_dst_valid = false; + qat_bufs->blout = new_bl; + qat_bufs->bloutp = new_blp; + qat_bufs->sz_out = new_bl_size; + + *sg = new_sg; + + return 0; +err: + qat_bl_sgl_free_unmap(accel_dev, new_sg, new_bl, true); + + if (!dma_mapping_error(dev, new_blp)) + dma_unmap_single(dev, new_blp, new_bl_size, DMA_TO_DEVICE); + + return -ENOMEM; +} diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h new file mode 100644 index 0000000000000000000000000000000000000000..8ca5e52ee9e21698d54e190be2bc8070e915bac4 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_bl.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2014 - 2022 Intel Corporation */ +#ifndef QAT_BL_H +#define QAT_BL_H +#include +#include +#include + +#define QAT_MAX_BUFF_DESC 4 + +struct qat_alg_buf { + u32 len; + u32 resrvd; + u64 addr; +} __packed; + +struct qat_alg_buf_list { + u64 resrvd; + u32 num_bufs; + u32 num_mapped_bufs; + struct qat_alg_buf bufers[]; +} __packed; + +struct qat_alg_fixed_buf_list { + struct qat_alg_buf_list sgl_hdr; + struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; +} __packed __aligned(64); + +struct qat_request_buffs { + struct qat_alg_buf_list *bl; + dma_addr_t blp; + struct qat_alg_buf_list *blout; + dma_addr_t bloutp; + size_t sz; + size_t sz_out; + bool sgl_src_valid; + bool sgl_dst_valid; + struct qat_alg_fixed_buf_list sgl_src; + struct qat_alg_fixed_buf_list sgl_dst; +}; + +struct qat_sgl_to_bufl_params { + dma_addr_t extra_dst_buff; + size_t sz_extra_dst_buff; +}; + +void qat_bl_free_bufl(struct adf_accel_dev *accel_dev, + struct qat_request_buffs *buf); +int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev, + struct scatterlist *sgl, + struct scatterlist *sglout, + struct qat_request_buffs *buf, + struct qat_sgl_to_bufl_params *params, + gfp_t flags); + +static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) +{ + return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; +} + +int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev, + struct scatterlist **newd, + unsigned int dlen, + struct qat_request_buffs *qat_bufs, + gfp_t gfp); + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c new file mode 100644 index 0000000000000000000000000000000000000000..1480d36a8d2bb152b134ebe5f72fc6fdb537bbf9 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c @@ -0,0 +1,344 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include +#include +#include +#include +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "qat_bl.h" +#include "qat_comp_req.h" +#include "qat_compression.h" +#include "qat_algs_send.h" + +static DEFINE_MUTEX(algs_lock); +static unsigned int active_devs; + +enum direction { + DECOMPRESSION = 0, + COMPRESSION = 1, +}; + +struct qat_compression_ctx { + u8 comp_ctx[QAT_COMP_CTX_SIZE]; + struct qat_compression_instance *inst; +}; + +struct qat_dst { + bool is_null; + int resubmitted; +}; + +struct qat_compression_req { + u8 req[QAT_COMP_REQ_SIZE]; + struct qat_compression_ctx *qat_compression_ctx; + struct acomp_req *acompress_req; + struct qat_request_buffs buf; + enum direction dir; + int actual_dlen; + struct qat_alg_req alg_req; + struct work_struct resubmit; + struct qat_dst dst; +}; + +static int qat_alg_send_dc_message(struct qat_compression_req *qat_req, + struct qat_compression_instance *inst, + struct crypto_async_request *base) +{ + struct qat_alg_req *alg_req = &qat_req->alg_req; + + alg_req->fw_req = (u32 *)&qat_req->req; + alg_req->tx_ring = inst->dc_tx; + alg_req->base = base; + alg_req->backlog = &inst->backlog; + + return qat_alg_send_message(alg_req); +} + +static void qat_comp_resubmit(struct work_struct *work) +{ + struct qat_compression_req *qat_req = + container_of(work, struct qat_compression_req, resubmit); + struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx; + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; + struct qat_request_buffs *qat_bufs = &qat_req->buf; + struct qat_compression_instance *inst = ctx->inst; + struct acomp_req *areq = qat_req->acompress_req; + struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq); + unsigned int dlen = CRYPTO_ACOMP_DST_MAX; + u8 *req = qat_req->req; + dma_addr_t dfbuf; + int ret; + + areq->dlen = dlen; + + dev_dbg(&GET_DEV(accel_dev), "[%s][%s] retry NULL dst request - dlen = %d\n", + crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)), + qat_req->dir == COMPRESSION ? "comp" : "decomp", dlen); + + ret = qat_bl_realloc_map_new_dst(accel_dev, &areq->dst, dlen, qat_bufs, + qat_algs_alloc_flags(&areq->base)); + if (ret) + goto err; + + qat_req->dst.resubmitted = true; + + dfbuf = qat_req->buf.bloutp; + qat_comp_override_dst(req, dfbuf, dlen); + + ret = qat_alg_send_dc_message(qat_req, inst, &areq->base); + if (ret != -ENOSPC) + return; + +err: + qat_bl_free_bufl(accel_dev, qat_bufs); + areq->base.complete(&areq->base, ret); +} + +static void qat_comp_generic_callback(struct qat_compression_req *qat_req, + void *resp) +{ + struct acomp_req *areq = qat_req->acompress_req; + struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx; + struct adf_accel_dev *accel_dev = ctx->inst->accel_dev; + struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq); + struct qat_compression_instance *inst = ctx->inst; + int consumed, produced; + s8 cmp_err, xlt_err; + int res = -EBADMSG; + int status; + u8 cnv; + + status = qat_comp_get_cmp_status(resp); + status |= qat_comp_get_xlt_status(resp); + cmp_err = qat_comp_get_cmp_err(resp); + xlt_err = qat_comp_get_xlt_err(resp); + + consumed = qat_comp_get_consumed_ctr(resp); + produced = qat_comp_get_produced_ctr(resp); + + dev_dbg(&GET_DEV(accel_dev), + "[%s][%s][%s] slen = %8d dlen = %8d consumed = %8d produced = %8d cmp_err = %3d xlt_err = %3d", + crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)), + qat_req->dir == COMPRESSION ? "comp " : "decomp", + status ? "ERR" : "OK ", + areq->slen, areq->dlen, consumed, produced, cmp_err, xlt_err); + + areq->dlen = 0; + + if (qat_req->dir == DECOMPRESSION && qat_req->dst.is_null) { + if (cmp_err == ERR_CODE_OVERFLOW_ERROR) { + if (qat_req->dst.resubmitted) { + dev_dbg(&GET_DEV(accel_dev), + "Output does not fit destination buffer\n"); + res = -EOVERFLOW; + goto end; + } + + INIT_WORK(&qat_req->resubmit, qat_comp_resubmit); + adf_misc_wq_queue_work(&qat_req->resubmit); + return; + } + } + + if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK)) + goto end; + + if (qat_req->dir == COMPRESSION) { + cnv = qat_comp_get_cmp_cnv_flag(resp); + if (unlikely(!cnv)) { + dev_err(&GET_DEV(accel_dev), + "Verified compression not supported\n"); + goto end; + } + + if (unlikely(produced > qat_req->actual_dlen)) { + memset(inst->dc_data->ovf_buff, 0, + inst->dc_data->ovf_buff_sz); + dev_dbg(&GET_DEV(accel_dev), + "Actual buffer overflow: produced=%d, dlen=%d\n", + produced, qat_req->actual_dlen); + goto end; + } + } + + res = 0; + areq->dlen = produced; + +end: + qat_bl_free_bufl(accel_dev, &qat_req->buf); + areq->base.complete(&areq->base, res); +} + +void qat_comp_alg_callback(void *resp) +{ + struct qat_compression_req *qat_req = + (void *)(__force long)qat_comp_get_opaque(resp); + struct qat_instance_backlog *backlog = qat_req->alg_req.backlog; + + qat_comp_generic_callback(qat_req, resp); + + qat_alg_send_backlog(backlog); +} + +static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct qat_compression_instance *inst; + int node; + + if (tfm->node == NUMA_NO_NODE) + node = numa_node_id(); + else + node = tfm->node; + + memset(ctx, 0, sizeof(*ctx)); + inst = qat_compression_get_instance_node(node); + if (!inst) + return -EINVAL; + ctx->inst = inst; + + ctx->inst->build_deflate_ctx(ctx->comp_ctx); + + return 0; +} + +static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm) +{ + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + + qat_compression_put_instance(ctx->inst); + memset(ctx, 0, sizeof(*ctx)); +} + +static int qat_comp_alg_compress_decompress(struct acomp_req *areq, + enum direction dir) +{ + struct qat_compression_req *qat_req = acomp_request_ctx(areq); + struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(areq); + struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm); + struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm); + struct qat_compression_instance *inst = ctx->inst; + struct qat_sgl_to_bufl_params *p_params = NULL; + gfp_t f = qat_algs_alloc_flags(&areq->base); + struct qat_sgl_to_bufl_params params; + unsigned int slen = areq->slen; + unsigned int dlen = areq->dlen; + dma_addr_t sfbuf, dfbuf; + u8 *req = qat_req->req; + size_t ovf_buff_sz; + int ret; + + if (!areq->src || !slen) + return -EINVAL; + + if (areq->dst && !dlen) + return -EINVAL; + + qat_req->dst.is_null = false; + + /* Handle acomp requests that require the allocation of a destination + * buffer. The size of the destination buffer is double the source + * buffer (rounded up to the size of a page) to fit the decompressed + * output or an expansion on the data for compression. + */ + if (!areq->dst) { + qat_req->dst.is_null = true; + + dlen = round_up(2 * slen, PAGE_SIZE); + areq->dst = sgl_alloc(dlen, f, NULL); + if (!areq->dst) + return -ENOMEM; + + areq->dlen = dlen; + qat_req->dst.resubmitted = false; + } + + if (dir == COMPRESSION) { + params.extra_dst_buff = inst->dc_data->ovf_buff_p; + ovf_buff_sz = inst->dc_data->ovf_buff_sz; + params.sz_extra_dst_buff = ovf_buff_sz; + p_params = ¶ms; + } + + ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst, + &qat_req->buf, p_params, f); + if (unlikely(ret)) + return ret; + + sfbuf = qat_req->buf.blp; + dfbuf = qat_req->buf.bloutp; + qat_req->qat_compression_ctx = ctx; + qat_req->acompress_req = areq; + qat_req->dir = dir; + + if (dir == COMPRESSION) { + qat_req->actual_dlen = dlen; + dlen += ovf_buff_sz; + qat_comp_create_compression_req(ctx->comp_ctx, req, + (u64)(__force long)sfbuf, slen, + (u64)(__force long)dfbuf, dlen, + (u64)(__force long)qat_req); + } else { + qat_comp_create_decompression_req(ctx->comp_ctx, req, + (u64)(__force long)sfbuf, slen, + (u64)(__force long)dfbuf, dlen, + (u64)(__force long)qat_req); + } + + ret = qat_alg_send_dc_message(qat_req, inst, &areq->base); + if (ret == -ENOSPC) + qat_bl_free_bufl(inst->accel_dev, &qat_req->buf); + + return ret; +} + +static int qat_comp_alg_compress(struct acomp_req *req) +{ + return qat_comp_alg_compress_decompress(req, COMPRESSION); +} + +static int qat_comp_alg_decompress(struct acomp_req *req) +{ + return qat_comp_alg_compress_decompress(req, DECOMPRESSION); +} + +static struct acomp_alg qat_acomp[] = { { + .base = { + .cra_name = "deflate", + .cra_driver_name = "qat_deflate", + .cra_priority = 4001, + .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY, + .cra_ctxsize = sizeof(struct qat_compression_ctx), + .cra_module = THIS_MODULE, + }, + .init = qat_comp_alg_init_tfm, + .exit = qat_comp_alg_exit_tfm, + .compress = qat_comp_alg_compress, + .decompress = qat_comp_alg_decompress, + .dst_free = sgl_free, + .reqsize = sizeof(struct qat_compression_req), +} }; + +int qat_comp_algs_register(void) +{ + int ret = 0; + + mutex_lock(&algs_lock); + if (++active_devs == 1) + ret = crypto_register_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); + mutex_unlock(&algs_lock); + return ret; +} + +void qat_comp_algs_unregister(void) +{ + mutex_lock(&algs_lock); + if (--active_devs == 0) + crypto_unregister_acomps(qat_acomp, ARRAY_SIZE(qat_acomp)); + mutex_unlock(&algs_lock); +} diff --git a/drivers/crypto/qat/qat_common/qat_comp_req.h b/drivers/crypto/qat/qat_common/qat_comp_req.h new file mode 100644 index 0000000000000000000000000000000000000000..404e32c5e77838df5dc94b22d33fb3b2e3ef0462 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_comp_req.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _QAT_COMP_REQ_H_ +#define _QAT_COMP_REQ_H_ + +#include "icp_qat_fw_comp.h" + +#define QAT_COMP_REQ_SIZE (sizeof(struct icp_qat_fw_comp_req)) +#define QAT_COMP_CTX_SIZE (QAT_COMP_REQ_SIZE * 2) + +static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen, + u64 dst, u32 dlen, u64 opaque) +{ + struct icp_qat_fw_comp_req *fw_tmpl = ctx; + struct icp_qat_fw_comp_req *fw_req = req; + struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars; + + memcpy(fw_req, fw_tmpl, sizeof(*fw_req)); + fw_req->comn_mid.src_data_addr = src; + fw_req->comn_mid.src_length = slen; + fw_req->comn_mid.dest_data_addr = dst; + fw_req->comn_mid.dst_length = dlen; + fw_req->comn_mid.opaque_data = opaque; + req_pars->comp_len = slen; + req_pars->out_buffer_sz = dlen; +} + +static inline void qat_comp_override_dst(void *req, u64 dst, u32 dlen) +{ + struct icp_qat_fw_comp_req *fw_req = req; + struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars; + + fw_req->comn_mid.dest_data_addr = dst; + fw_req->comn_mid.dst_length = dlen; + req_pars->out_buffer_sz = dlen; +} + +static inline void qat_comp_create_compression_req(void *ctx, void *req, + u64 src, u32 slen, + u64 dst, u32 dlen, + u64 opaque) +{ + qat_comp_create_req(ctx, req, src, slen, dst, dlen, opaque); +} + +static inline void qat_comp_create_decompression_req(void *ctx, void *req, + u64 src, u32 slen, + u64 dst, u32 dlen, + u64 opaque) +{ + struct icp_qat_fw_comp_req *fw_tmpl = ctx; + + fw_tmpl++; + qat_comp_create_req(fw_tmpl, req, src, slen, dst, dlen, opaque); +} + +static inline u32 qat_comp_get_consumed_ctr(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.input_byte_counter; +} + +static inline u32 qat_comp_get_produced_ctr(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.output_byte_counter; +} + +static inline u32 qat_comp_get_produced_adler32(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comp_resp_pars.crc.legacy.curr_adler_32; +} + +static inline u64 qat_comp_get_opaque(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->opaque_data; +} + +static inline s8 qat_comp_get_cmp_err(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comn_resp.comn_error.cmp_err_code; +} + +static inline s8 qat_comp_get_xlt_err(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + + return qat_resp->comn_resp.comn_error.xlat_err_code; +} + +static inline s8 qat_comp_get_cmp_status(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 stat_filed = qat_resp->comn_resp.comn_status; + + return ICP_QAT_FW_COMN_RESP_CMP_STAT_GET(stat_filed); +} + +static inline s8 qat_comp_get_xlt_status(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 stat_filed = qat_resp->comn_resp.comn_status; + + return ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET(stat_filed); +} + +static inline u8 qat_comp_get_cmp_cnv_flag(void *resp) +{ + struct icp_qat_fw_comp_resp *qat_resp = resp; + u8 flags = qat_resp->comn_resp.hdr_flags; + + return ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(flags); +} + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_compression.c b/drivers/crypto/qat/qat_common/qat_compression.c new file mode 100644 index 0000000000000000000000000000000000000000..9fd10f4242f8468460b1bc31f5336362a72b395b --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_compression.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2022 Intel Corporation */ +#include +#include +#include "adf_accel_devices.h" +#include "adf_common_drv.h" +#include "adf_transport.h" +#include "adf_transport_access_macros.h" +#include "adf_cfg.h" +#include "adf_cfg_strings.h" +#include "qat_compression.h" +#include "icp_qat_fw.h" + +#define SEC ADF_KERNEL_SEC + +static struct service_hndl qat_compression; + +void qat_compression_put_instance(struct qat_compression_instance *inst) +{ + atomic_dec(&inst->refctr); + adf_dev_put(inst->accel_dev); +} + +static int qat_compression_free_instances(struct adf_accel_dev *accel_dev) +{ + struct qat_compression_instance *inst; + struct list_head *list_ptr, *tmp; + int i; + + list_for_each_safe(list_ptr, tmp, &accel_dev->compression_list) { + inst = list_entry(list_ptr, + struct qat_compression_instance, list); + + for (i = 0; i < atomic_read(&inst->refctr); i++) + qat_compression_put_instance(inst); + + if (inst->dc_tx) + adf_remove_ring(inst->dc_tx); + + if (inst->dc_rx) + adf_remove_ring(inst->dc_rx); + + list_del(list_ptr); + kfree(inst); + } + return 0; +} + +struct qat_compression_instance *qat_compression_get_instance_node(int node) +{ + struct qat_compression_instance *inst = NULL; + struct adf_accel_dev *accel_dev = NULL; + unsigned long best = ~0; + struct list_head *itr; + + list_for_each(itr, adf_devmgr_get_head()) { + struct adf_accel_dev *tmp_dev; + unsigned long ctr; + int tmp_dev_node; + + tmp_dev = list_entry(itr, struct adf_accel_dev, list); + tmp_dev_node = dev_to_node(&GET_DEV(tmp_dev)); + + if ((node == tmp_dev_node || tmp_dev_node < 0) && + adf_dev_started(tmp_dev) && !list_empty(&tmp_dev->compression_list)) { + ctr = atomic_read(&tmp_dev->ref_count); + if (best > ctr) { + accel_dev = tmp_dev; + best = ctr; + } + } + } + + if (!accel_dev) { + pr_info("QAT: Could not find a device on node %d\n", node); + /* Get any started device */ + list_for_each(itr, adf_devmgr_get_head()) { + struct adf_accel_dev *tmp_dev; + + tmp_dev = list_entry(itr, struct adf_accel_dev, list); + if (adf_dev_started(tmp_dev) && + !list_empty(&tmp_dev->compression_list)) { + accel_dev = tmp_dev; + break; + } + } + } + + if (!accel_dev) + return NULL; + + best = ~0; + list_for_each(itr, &accel_dev->compression_list) { + struct qat_compression_instance *tmp_inst; + unsigned long ctr; + + tmp_inst = list_entry(itr, struct qat_compression_instance, list); + ctr = atomic_read(&tmp_inst->refctr); + if (best > ctr) { + inst = tmp_inst; + best = ctr; + } + } + if (inst) { + if (adf_dev_get(accel_dev)) { + dev_err(&GET_DEV(accel_dev), "Could not increment dev refctr\n"); + return NULL; + } + atomic_inc(&inst->refctr); + } + return inst; +} + +static int qat_compression_create_instances(struct adf_accel_dev *accel_dev) +{ + struct qat_compression_instance *inst; + char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; + char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES]; + unsigned long num_inst, num_msg_dc; + unsigned long bank; + int msg_size; + int ret; + int i; + + INIT_LIST_HEAD(&accel_dev->compression_list); + strscpy(key, ADF_NUM_DC, sizeof(key)); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &num_inst); + if (ret) + return ret; + + for (i = 0; i < num_inst; i++) { + inst = kzalloc_node(sizeof(*inst), GFP_KERNEL, + dev_to_node(&GET_DEV(accel_dev))); + if (!inst) { + ret = -ENOMEM; + goto err; + } + + list_add_tail(&inst->list, &accel_dev->compression_list); + inst->id = i; + atomic_set(&inst->refctr, 0); + inst->accel_dev = accel_dev; + inst->build_deflate_ctx = GET_DC_OPS(accel_dev)->build_deflate_ctx; + + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &bank); + if (ret) + return ret; + + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i); + ret = adf_cfg_get_param_value(accel_dev, SEC, key, val); + if (ret) + return ret; + + ret = kstrtoul(val, 10, &num_msg_dc); + if (ret) + return ret; + + msg_size = ICP_QAT_FW_REQ_DEFAULT_SZ; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i); + ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc, + msg_size, key, NULL, 0, &inst->dc_tx); + if (ret) + return ret; + + msg_size = ICP_QAT_FW_RESP_DEFAULT_SZ; + snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i); + ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc, + msg_size, key, qat_comp_alg_callback, 0, + &inst->dc_rx); + if (ret) + return ret; + + inst->dc_data = accel_dev->dc_data; + INIT_LIST_HEAD(&inst->backlog.list); + spin_lock_init(&inst->backlog.lock); + } + return 0; +err: + qat_compression_free_instances(accel_dev); + return ret; +} + +static int qat_compression_alloc_dc_data(struct adf_accel_dev *accel_dev) +{ + struct device *dev = &GET_DEV(accel_dev); + dma_addr_t obuff_p = DMA_MAPPING_ERROR; + size_t ovf_buff_sz = QAT_COMP_MAX_SKID; + struct adf_dc_data *dc_data = NULL; + u8 *obuff = NULL; + + dc_data = devm_kzalloc(dev, sizeof(*dc_data), GFP_KERNEL); + if (!dc_data) + goto err; + + obuff = kzalloc_node(ovf_buff_sz, GFP_KERNEL, dev_to_node(dev)); + if (!obuff) + goto err; + + obuff_p = dma_map_single(dev, obuff, ovf_buff_sz, DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(dev, obuff_p))) + goto err; + + dc_data->ovf_buff = obuff; + dc_data->ovf_buff_p = obuff_p; + dc_data->ovf_buff_sz = ovf_buff_sz; + + accel_dev->dc_data = dc_data; + + return 0; + +err: + accel_dev->dc_data = NULL; + kfree(obuff); + devm_kfree(dev, dc_data); + return -ENOMEM; +} + +static void qat_free_dc_data(struct adf_accel_dev *accel_dev) +{ + struct adf_dc_data *dc_data = accel_dev->dc_data; + struct device *dev = &GET_DEV(accel_dev); + + if (!dc_data) + return; + + dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz, + DMA_FROM_DEVICE); + memset(dc_data->ovf_buff, 0, dc_data->ovf_buff_sz); + kfree(dc_data->ovf_buff); + devm_kfree(dev, dc_data); + accel_dev->dc_data = NULL; +} + +static int qat_compression_init(struct adf_accel_dev *accel_dev) +{ + int ret; + + ret = qat_compression_alloc_dc_data(accel_dev); + if (ret) + return ret; + + ret = qat_compression_create_instances(accel_dev); + if (ret) + qat_free_dc_data(accel_dev); + + return ret; +} + +static int qat_compression_shutdown(struct adf_accel_dev *accel_dev) +{ + qat_free_dc_data(accel_dev); + return qat_compression_free_instances(accel_dev); +} + +static int qat_compression_event_handler(struct adf_accel_dev *accel_dev, + enum adf_event event) +{ + int ret; + + switch (event) { + case ADF_EVENT_INIT: + ret = qat_compression_init(accel_dev); + break; + case ADF_EVENT_SHUTDOWN: + ret = qat_compression_shutdown(accel_dev); + break; + case ADF_EVENT_RESTARTING: + case ADF_EVENT_RESTARTED: + case ADF_EVENT_START: + case ADF_EVENT_STOP: + default: + ret = 0; + } + return ret; +} + +int qat_compression_register(void) +{ + memset(&qat_compression, 0, sizeof(qat_compression)); + qat_compression.event_hld = qat_compression_event_handler; + qat_compression.name = "qat_compression"; + return adf_service_register(&qat_compression); +} + +int qat_compression_unregister(void) +{ + return adf_service_unregister(&qat_compression); +} diff --git a/drivers/crypto/qat/qat_common/qat_compression.h b/drivers/crypto/qat/qat_common/qat_compression.h new file mode 100644 index 0000000000000000000000000000000000000000..aebac2302dcf23a9af5e1eafdb55c50c8959e8e8 --- /dev/null +++ b/drivers/crypto/qat/qat_common/qat_compression.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright(c) 2022 Intel Corporation */ +#ifndef _QAT_COMPRESSION_H_ +#define _QAT_COMPRESSION_H_ + +#include +#include +#include "adf_accel_devices.h" +#include "qat_algs_send.h" + +#define QAT_COMP_MAX_SKID 4096 + +struct qat_compression_instance { + struct adf_etr_ring_data *dc_tx; + struct adf_etr_ring_data *dc_rx; + struct adf_accel_dev *accel_dev; + struct list_head list; + unsigned long state; + int id; + atomic_t refctr; + struct qat_instance_backlog backlog; + struct adf_dc_data *dc_data; + void (*build_deflate_ctx)(void *ctx); +}; + +static inline bool adf_hw_dev_has_compression(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + u32 mask = ~hw_device->accel_capabilities_mask; + + if (mask & ADF_ACCEL_CAPABILITIES_COMPRESSION) + return false; + + return true; +} + +#endif diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c index 9341d892533a7ebc57a23a1a0461ec126890dc65..e31199eade5b6f97587e59d99f0b2debd664b9fd 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.c +++ b/drivers/crypto/qat/qat_common/qat_crypto.c @@ -5,7 +5,6 @@ #include "adf_accel_devices.h" #include "adf_common_drv.h" #include "adf_transport.h" -#include "adf_transport_access_macros.h" #include "adf_cfg.h" #include "adf_cfg_strings.h" #include "adf_gen2_hw_data.h" @@ -126,126 +125,9 @@ int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev) return -EFAULT; } - return qat_crypto_dev_config(accel_dev); + return GET_HW_DATA(accel_dev)->dev_config(accel_dev); } -/** - * qat_crypto_dev_config() - create dev config required to create crypto inst. - * - * @accel_dev: Pointer to acceleration device. - * - * Function creates device configuration required to create crypto instances - * - * Return: 0 on success, error code otherwise. - */ -int qat_crypto_dev_config(struct adf_accel_dev *accel_dev) -{ - char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES]; - int banks = GET_MAX_BANKS(accel_dev); - int cpus = num_online_cpus(); - unsigned long val; - int instances; - int ret; - int i; - - if (adf_hw_dev_has_crypto(accel_dev)) - instances = min(cpus, banks); - else - instances = 0; - - ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC); - if (ret) - goto err; - - ret = adf_cfg_section_add(accel_dev, "Accelerator0"); - if (ret) - goto err; - - for (i = 0; i < instances; i++) { - val = i; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY, - i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i); - val = 128; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 512; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 0; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 2; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 8; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = 10; - snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i); - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, - key, &val, ADF_DEC); - if (ret) - goto err; - - val = ADF_COALESCING_DEF_TIME; - snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i); - ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0", - key, &val, ADF_DEC); - if (ret) - goto err; - } - - val = i; - ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY, - &val, ADF_DEC); - if (ret) - goto err; - - set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status); - return 0; -err: - dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n"); - return ret; -} -EXPORT_SYMBOL_GPL(qat_crypto_dev_config); - static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev) { unsigned long num_inst, num_msg_sym, num_msg_asym; diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h index df3c738ce323a99440c3b65fc759df17e63cad03..6a0e961bb9dc774583e58539d9bf8476d6b647c6 100644 --- a/drivers/crypto/qat/qat_common/qat_crypto.h +++ b/drivers/crypto/qat/qat_common/qat_crypto.h @@ -8,19 +8,8 @@ #include #include "adf_accel_devices.h" #include "icp_qat_fw_la.h" - -struct qat_instance_backlog { - struct list_head list; - spinlock_t lock; /* protects backlog list */ -}; - -struct qat_alg_req { - u32 *fw_req; - struct adf_etr_ring_data *tx_ring; - struct crypto_async_request *base; - struct list_head list; - struct qat_instance_backlog *backlog; -}; +#include "qat_algs_send.h" +#include "qat_bl.h" struct qat_crypto_instance { struct adf_etr_ring_data *sym_tx; @@ -35,39 +24,6 @@ struct qat_crypto_instance { struct qat_instance_backlog backlog; }; -#define QAT_MAX_BUFF_DESC 4 - -struct qat_alg_buf { - u32 len; - u32 resrvd; - u64 addr; -} __packed; - -struct qat_alg_buf_list { - u64 resrvd; - u32 num_bufs; - u32 num_mapped_bufs; - struct qat_alg_buf bufers[]; -} __packed; - -struct qat_alg_fixed_buf_list { - struct qat_alg_buf_list sgl_hdr; - struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC]; -} __packed __aligned(64); - -struct qat_crypto_request_buffs { - struct qat_alg_buf_list *bl; - dma_addr_t blp; - struct qat_alg_buf_list *blout; - dma_addr_t bloutp; - size_t sz; - size_t sz_out; - bool sgl_src_valid; - bool sgl_dst_valid; - struct qat_alg_fixed_buf_list sgl_src; - struct qat_alg_fixed_buf_list sgl_dst; -}; - struct qat_crypto_request; struct qat_crypto_request { @@ -80,7 +36,7 @@ struct qat_crypto_request { struct aead_request *aead_req; struct skcipher_request *skcipher_req; }; - struct qat_crypto_request_buffs buf; + struct qat_request_buffs buf; void (*cb)(struct icp_qat_fw_la_resp *resp, struct qat_crypto_request *req); union { @@ -109,9 +65,4 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev) return true; } -static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req) -{ - return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; -} - #endif diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index cb3bdd3618fb0e72701d7dc87272462a8a75a587..bc80bb475118d36dc8565bf4ca135d6d94e54ba6 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2014 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include "adf_dh895xcc_hw_data.h" @@ -234,12 +236,14 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->enable_ints = adf_gen2_enable_ints; hw_data->reset_device = adf_reset_sbr; hw_data->disable_iov = adf_disable_sriov; + hw_data->dev_config = adf_gen2_dev_config; adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops); hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts; hw_data->pfvf_ops.disable_all_vf2pf_interrupts = disable_all_vf2pf_interrupts; hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts; adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c index acca56752aa02dec61fcc901ace74edecdf63c32..ebeb17b67fcd58d89f7ab569ab4491f23faf8e99 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c @@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto out_err_disable_aer; } - ret = qat_crypto_dev_config(accel_dev); + ret = hw_data->dev_config(accel_dev); if (ret) goto out_err_disable_aer; diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c index 31c14d7e1c115d00414388df60ab6a8605ba1e09..70e56cc16ecebb761f50961b64d47cbef0b8241e 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c @@ -2,6 +2,8 @@ /* Copyright(c) 2015 - 2021 Intel Corporation */ #include #include +#include +#include #include #include #include @@ -86,9 +88,11 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) hw_data->get_sku = get_sku; hw_data->enable_ints = adf_vf_void_noop; hw_data->dev_class->instances++; + hw_data->dev_config = adf_gen2_dev_config; adf_devmgr_update_class_index(hw_data); adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops); adf_gen2_init_hw_csr_ops(&hw_data->csr_ops); + adf_gen2_init_dc_ops(&hw_data->dc_ops); } void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data) diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c index 18756b2e1c91276a18147f3dd320e47f86f48636..c1485e702b3eb5ab2e00cb8a55a6b36498e38369 100644 --- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c +++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c @@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (ret) goto out_err_dev_shutdown; - set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status); - ret = adf_dev_start(accel_dev); if (ret) goto out_err_dev_stop; diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c index 6eb4d2e356297c1182f88382ff0757aee26fbf90..7d811728f047823c3254c4234ce77ae71719b168 100644 --- a/drivers/crypto/qce/aead.c +++ b/drivers/crypto/qce/aead.c @@ -24,7 +24,7 @@ static void qce_aead_done(void *data) { struct crypto_async_request *async_req = data; struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; @@ -92,7 +92,7 @@ static void qce_aead_done(void *data) static struct scatterlist * qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; @@ -103,7 +103,7 @@ qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req) static struct scatterlist * qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE); return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE); @@ -112,7 +112,7 @@ qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req) static struct scatterlist * qce_aead_prepare_dst_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; struct scatterlist *sg, *msg_sg, __sg[2]; @@ -186,7 +186,7 @@ qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req) { struct scatterlist *sg, *msg_sg, __sg[2]; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); unsigned int assoclen = rctx->assoclen; unsigned int adata_header_len, cryptlen, totallen; @@ -300,7 +300,7 @@ err_free: static int qce_aead_prepare_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; struct scatterlist *sg; @@ -328,7 +328,7 @@ static int qce_aead_prepare_buf(struct aead_request *req) static int qce_aead_ccm_prepare_buf(struct aead_request *req) { - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); struct scatterlist *sg; @@ -408,7 +408,7 @@ static int qce_aead_async_req_handle(struct crypto_async_request *async_req) { struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct crypto_aead *tfm = crypto_aead_reqtfm(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); @@ -502,7 +502,7 @@ error_free: static int qce_aead_crypt(struct aead_request *req, int encrypt) { struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm); struct qce_alg_template *tmpl = to_aead_tmpl(tfm); unsigned int blocksize = crypto_aead_blocksize(tfm); @@ -675,8 +675,8 @@ static int qce_aead_init(struct crypto_aead *tfm) if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); - crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) + - crypto_aead_reqsize(ctx->fallback)); + crypto_aead_set_reqsize_dma(tfm, sizeof(struct qce_aead_reqctx) + + crypto_aead_reqsize(ctx->fallback)); return 0; } diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c index 7c612ba5068f7b31a2740618665a97e69c41bb9c..04253a8d33409a2a51db527435d09ae85a7880af 100644 --- a/drivers/crypto/qce/common.c +++ b/drivers/crypto/qce/common.c @@ -3,6 +3,7 @@ * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved. */ +#include #include #include #include @@ -147,7 +148,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req) { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *ahash = __crypto_ahash_cast(async_req->tfm); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; unsigned int digestsize = crypto_ahash_digestsize(ahash); @@ -419,7 +420,7 @@ static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int static int qce_setup_regs_aead(struct crypto_async_request *async_req) { struct aead_request *req = aead_request_cast(async_req); - struct qce_aead_reqctx *rctx = aead_request_ctx(req); + struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req); struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req)); struct qce_device *qce = tmpl->qce; diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c index 37bafd7aeb79df4fb650e5c95e2604798a5cf653..fc72af8aa9a725be7ba59cad71be4123b8dfe97f 100644 --- a/drivers/crypto/qce/sha.c +++ b/drivers/crypto/qce/sha.c @@ -38,7 +38,7 @@ static void qce_ahash_done(void *data) struct crypto_async_request *async_req = data; struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *ahash = crypto_ahash_reqtfm(req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; struct qce_result_dump *result = qce->dma.result_buf; @@ -75,7 +75,7 @@ static void qce_ahash_done(void *data) static int qce_ahash_async_req_handle(struct crypto_async_request *async_req) { struct ahash_request *req = ahash_request_cast(async_req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_sha_ctx *ctx = crypto_tfm_ctx(async_req->tfm); struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm); struct qce_device *qce = tmpl->qce; @@ -132,7 +132,7 @@ error_unmap_src: static int qce_ahash_init(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); const u32 *std_iv = tmpl->std_iv; @@ -147,7 +147,7 @@ static int qce_ahash_init(struct ahash_request *req) static int qce_ahash_export(struct ahash_request *req, void *out) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_sha_saved_state *export_state = out; memcpy(export_state->pending_buf, rctx->buf, rctx->buflen); @@ -164,7 +164,7 @@ static int qce_ahash_export(struct ahash_request *req, void *out) static int qce_ahash_import(struct ahash_request *req, const void *in) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); const struct qce_sha_saved_state *import_state = in; memset(rctx, 0, sizeof(*rctx)); @@ -183,7 +183,7 @@ static int qce_ahash_import(struct ahash_request *req, const void *in) static int qce_ahash_update(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; struct scatterlist *sg_last, *sg; @@ -275,7 +275,7 @@ static int qce_ahash_update(struct ahash_request *req) static int qce_ahash_final(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; @@ -302,7 +302,7 @@ static int qce_ahash_final(struct ahash_request *req) static int qce_ahash_digest(struct ahash_request *req) { - struct qce_sha_reqctx *rctx = ahash_request_ctx(req); + struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req); struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm); struct qce_device *qce = tmpl->qce; int ret; @@ -395,7 +395,7 @@ static int qce_ahash_cra_init(struct crypto_tfm *tfm) struct crypto_ahash *ahash = __crypto_ahash_cast(tfm); struct qce_sha_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_ahash_set_reqsize(ahash, sizeof(struct qce_sha_reqctx)); + crypto_ahash_set_reqsize_dma(ahash, sizeof(struct qce_sha_reqctx)); memset(ctx, 0, sizeof(*ctx)); return 0; } diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c index 35d73061d1569b292de14d6a206d832f800e8007..9f6ba770a90a1d8967d3f1469762cf47342ac7f4 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.c +++ b/drivers/crypto/rockchip/rk3288_crypto.c @@ -14,235 +14,162 @@ #include #include #include +#include #include #include #include -static int rk_crypto_enable_clk(struct rk_crypto_info *dev) -{ - int err; - - err = clk_prepare_enable(dev->sclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock sclk\n", - __func__, __LINE__); - goto err_return; - } - err = clk_prepare_enable(dev->aclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock aclk\n", - __func__, __LINE__); - goto err_aclk; - } - err = clk_prepare_enable(dev->hclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock hclk\n", - __func__, __LINE__); - goto err_hclk; - } - err = clk_prepare_enable(dev->dmaclk); - if (err) { - dev_err(dev->dev, "[%s:%d], Couldn't enable clock dmaclk\n", - __func__, __LINE__); - goto err_dmaclk; - } - return err; -err_dmaclk: - clk_disable_unprepare(dev->hclk); -err_hclk: - clk_disable_unprepare(dev->aclk); -err_aclk: - clk_disable_unprepare(dev->sclk); -err_return: - return err; -} +static struct rockchip_ip rocklist = { + .dev_list = LIST_HEAD_INIT(rocklist.dev_list), + .lock = __SPIN_LOCK_UNLOCKED(rocklist.lock), +}; -static void rk_crypto_disable_clk(struct rk_crypto_info *dev) +struct rk_crypto_info *get_rk_crypto(void) { - clk_disable_unprepare(dev->dmaclk); - clk_disable_unprepare(dev->hclk); - clk_disable_unprepare(dev->aclk); - clk_disable_unprepare(dev->sclk); + struct rk_crypto_info *first; + + spin_lock(&rocklist.lock); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + list_rotate_left(&rocklist.dev_list); + spin_unlock(&rocklist.lock); + return first; } -static int check_alignment(struct scatterlist *sg_src, - struct scatterlist *sg_dst, - int align_mask) -{ - int in, out, align; +static const struct rk_variant rk3288_variant = { + .num_clks = 4, + .rkclks = { + { "sclk", 150000000}, + } +}; - in = IS_ALIGNED((uint32_t)sg_src->offset, 4) && - IS_ALIGNED((uint32_t)sg_src->length, align_mask); - if (!sg_dst) - return in; - out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) && - IS_ALIGNED((uint32_t)sg_dst->length, align_mask); - align = in && out; +static const struct rk_variant rk3328_variant = { + .num_clks = 3, +}; - return (align && (sg_src->length == sg_dst->length)); -} +static const struct rk_variant rk3399_variant = { + .num_clks = 3, +}; -static int rk_load_data(struct rk_crypto_info *dev, - struct scatterlist *sg_src, - struct scatterlist *sg_dst) +static int rk_crypto_get_clks(struct rk_crypto_info *dev) { - unsigned int count; - - dev->aligned = dev->aligned ? - check_alignment(sg_src, sg_dst, dev->align_size) : - dev->aligned; - if (dev->aligned) { - count = min(dev->left_bytes, sg_src->length); - dev->left_bytes -= count; - - if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) { - dev_err(dev->dev, "[%s:%d] dma_map_sg(src) error\n", - __func__, __LINE__); - return -EINVAL; - } - dev->addr_in = sg_dma_address(sg_src); - - if (sg_dst) { - if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) { - dev_err(dev->dev, - "[%s:%d] dma_map_sg(dst) error\n", - __func__, __LINE__); - dma_unmap_sg(dev->dev, sg_src, 1, - DMA_TO_DEVICE); - return -EINVAL; - } - dev->addr_out = sg_dma_address(sg_dst); - } - } else { - count = (dev->left_bytes > PAGE_SIZE) ? - PAGE_SIZE : dev->left_bytes; - - if (!sg_pcopy_to_buffer(dev->first, dev->src_nents, - dev->addr_vir, count, - dev->total - dev->left_bytes)) { - dev_err(dev->dev, "[%s:%d] pcopy err\n", - __func__, __LINE__); - return -EINVAL; - } - dev->left_bytes -= count; - sg_init_one(&dev->sg_tmp, dev->addr_vir, count); - if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) { - dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp) error\n", - __func__, __LINE__); - return -ENOMEM; - } - dev->addr_in = sg_dma_address(&dev->sg_tmp); - - if (sg_dst) { - if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, - DMA_FROM_DEVICE)) { - dev_err(dev->dev, - "[%s:%d] dma_map_sg(sg_tmp) error\n", - __func__, __LINE__); - dma_unmap_sg(dev->dev, &dev->sg_tmp, 1, - DMA_TO_DEVICE); - return -ENOMEM; + int i, j, err; + unsigned long cr; + + dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks); + if (dev->num_clks < dev->variant->num_clks) { + dev_err(dev->dev, "Missing clocks, got %d instead of %d\n", + dev->num_clks, dev->variant->num_clks); + return -EINVAL; + } + + for (i = 0; i < dev->num_clks; i++) { + cr = clk_get_rate(dev->clks[i].clk); + for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) { + if (dev->variant->rkclks[j].max == 0) + continue; + if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id)) + continue; + if (cr > dev->variant->rkclks[j].max) { + err = clk_set_rate(dev->clks[i].clk, + dev->variant->rkclks[j].max); + if (err) + dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n", + dev->variant->rkclks[j].name, cr, + dev->variant->rkclks[j].max); + else + dev_info(dev->dev, "Downclocking %s from %lu to %lu\n", + dev->variant->rkclks[j].name, cr, + dev->variant->rkclks[j].max); } - dev->addr_out = sg_dma_address(&dev->sg_tmp); } } - dev->count = count; return 0; } -static void rk_unload_data(struct rk_crypto_info *dev) +static int rk_crypto_enable_clk(struct rk_crypto_info *dev) { - struct scatterlist *sg_in, *sg_out; + int err; - sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp; - dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE); + err = clk_bulk_prepare_enable(dev->num_clks, dev->clks); + if (err) + dev_err(dev->dev, "Could not enable clock clks\n"); - if (dev->sg_dst) { - sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp; - dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE); - } + return err; } -static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) +static void rk_crypto_disable_clk(struct rk_crypto_info *dev) { - struct rk_crypto_info *dev = platform_get_drvdata(dev_id); - u32 interrupt_status; + clk_bulk_disable_unprepare(dev->num_clks, dev->clks); +} - spin_lock(&dev->lock); - interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); - CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); +/* + * Power management strategy: The device is suspended until a request + * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s. + */ +static int rk_crypto_pm_suspend(struct device *dev) +{ + struct rk_crypto_info *rkdev = dev_get_drvdata(dev); - if (interrupt_status & 0x0a) { - dev_warn(dev->dev, "DMA Error\n"); - dev->err = -EFAULT; - } - tasklet_schedule(&dev->done_task); + rk_crypto_disable_clk(rkdev); + reset_control_assert(rkdev->rst); - spin_unlock(&dev->lock); - return IRQ_HANDLED; + return 0; } -static int rk_crypto_enqueue(struct rk_crypto_info *dev, - struct crypto_async_request *async_req) +static int rk_crypto_pm_resume(struct device *dev) { - unsigned long flags; + struct rk_crypto_info *rkdev = dev_get_drvdata(dev); int ret; - spin_lock_irqsave(&dev->lock, flags); - ret = crypto_enqueue_request(&dev->queue, async_req); - if (dev->busy) { - spin_unlock_irqrestore(&dev->lock, flags); + ret = rk_crypto_enable_clk(rkdev); + if (ret) return ret; - } - dev->busy = true; - spin_unlock_irqrestore(&dev->lock, flags); - tasklet_schedule(&dev->queue_task); - return ret; -} + reset_control_deassert(rkdev->rst); + return 0; -static void rk_crypto_queue_task_cb(unsigned long data) -{ - struct rk_crypto_info *dev = (struct rk_crypto_info *)data; - struct crypto_async_request *async_req, *backlog; - unsigned long flags; - int err = 0; +} - dev->err = 0; - spin_lock_irqsave(&dev->lock, flags); - backlog = crypto_get_backlog(&dev->queue); - async_req = crypto_dequeue_request(&dev->queue); +static const struct dev_pm_ops rk_crypto_pm_ops = { + SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL) +}; - if (!async_req) { - dev->busy = false; - spin_unlock_irqrestore(&dev->lock, flags); - return; - } - spin_unlock_irqrestore(&dev->lock, flags); +static int rk_crypto_pm_init(struct rk_crypto_info *rkdev) +{ + int err; - if (backlog) { - backlog->complete(backlog, -EINPROGRESS); - backlog = NULL; - } + pm_runtime_use_autosuspend(rkdev->dev); + pm_runtime_set_autosuspend_delay(rkdev->dev, 2000); - dev->async_req = async_req; - err = dev->start(dev); + err = pm_runtime_set_suspended(rkdev->dev); if (err) - dev->complete(dev->async_req, err); + return err; + pm_runtime_enable(rkdev->dev); + return err; } -static void rk_crypto_done_task_cb(unsigned long data) +static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev) { - struct rk_crypto_info *dev = (struct rk_crypto_info *)data; + pm_runtime_disable(rkdev->dev); +} - if (dev->err) { - dev->complete(dev->async_req, dev->err); - return; +static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id) +{ + struct rk_crypto_info *dev = platform_get_drvdata(dev_id); + u32 interrupt_status; + + interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS); + CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status); + + dev->status = 1; + if (interrupt_status & 0x0a) { + dev_warn(dev->dev, "DMA Error\n"); + dev->status = 0; } + complete(&dev->complete); - dev->err = dev->update(dev); - if (dev->err) - dev->complete(dev->async_req, dev->err); + return IRQ_HANDLED; } static struct rk_crypto_tmp *rk_cipher_algs[] = { @@ -257,6 +184,62 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = { &rk_ahash_md5, }; +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG +static int rk_crypto_debugfs_show(struct seq_file *seq, void *v) +{ + struct rk_crypto_info *dd; + unsigned int i; + + spin_lock(&rocklist.lock); + list_for_each_entry(dd, &rocklist.dev_list, list) { + seq_printf(seq, "%s %s requests: %lu\n", + dev_driver_string(dd->dev), dev_name(dd->dev), + dd->nreq); + } + spin_unlock(&rocklist.lock); + + for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { + if (!rk_cipher_algs[i]->dev) + continue; + switch (rk_cipher_algs[i]->type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", + rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name, + rk_cipher_algs[i]->alg.skcipher.base.cra_name, + rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); + seq_printf(seq, "\tfallback due to length: %lu\n", + rk_cipher_algs[i]->stat_fb_len); + seq_printf(seq, "\tfallback due to alignment: %lu\n", + rk_cipher_algs[i]->stat_fb_align); + seq_printf(seq, "\tfallback due to SGs: %lu\n", + rk_cipher_algs[i]->stat_fb_sgdiff); + break; + case CRYPTO_ALG_TYPE_AHASH: + seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n", + rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name, + rk_cipher_algs[i]->alg.hash.halg.base.cra_name, + rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb); + break; + } + } + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs); +#endif + +static void register_debugfs(struct rk_crypto_info *crypto_info) +{ +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + /* Ignore error of debugfs */ + rocklist.dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL); + rocklist.dbgfs_stats = debugfs_create_file("stats", 0444, + rocklist.dbgfs_dir, + &rocklist, + &rk_crypto_debugfs_fops); +#endif +} + static int rk_crypto_register(struct rk_crypto_info *crypto_info) { unsigned int i, k; @@ -264,12 +247,22 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info) for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { rk_cipher_algs[i]->dev = crypto_info; - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) - err = crypto_register_skcipher( - &rk_cipher_algs[i]->alg.skcipher); - else - err = crypto_register_ahash( - &rk_cipher_algs[i]->alg.hash); + switch (rk_cipher_algs[i]->type) { + case CRYPTO_ALG_TYPE_SKCIPHER: + dev_info(crypto_info->dev, "Register %s as %s\n", + rk_cipher_algs[i]->alg.skcipher.base.cra_name, + rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name); + err = crypto_register_skcipher(&rk_cipher_algs[i]->alg.skcipher); + break; + case CRYPTO_ALG_TYPE_AHASH: + dev_info(crypto_info->dev, "Register %s as %s\n", + rk_cipher_algs[i]->alg.hash.halg.base.cra_name, + rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name); + err = crypto_register_ahash(&rk_cipher_algs[i]->alg.hash); + break; + default: + dev_err(crypto_info->dev, "unknown algorithm\n"); + } if (err) goto err_cipher_algs; } @@ -277,7 +270,7 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info) err_cipher_algs: for (k = 0; k < i; k++) { - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) + if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher); else crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash); @@ -290,22 +283,23 @@ static void rk_crypto_unregister(void) unsigned int i; for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) { - if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER) + if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER) crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher); else crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash); } } -static void rk_crypto_action(void *data) -{ - struct rk_crypto_info *crypto_info = data; - - reset_control_assert(crypto_info->rst); -} - static const struct of_device_id crypto_of_id_table[] = { - { .compatible = "rockchip,rk3288-crypto" }, + { .compatible = "rockchip,rk3288-crypto", + .data = &rk3288_variant, + }, + { .compatible = "rockchip,rk3328-crypto", + .data = &rk3328_variant, + }, + { .compatible = "rockchip,rk3399-crypto", + .data = &rk3399_variant, + }, {} }; MODULE_DEVICE_TABLE(of, crypto_of_id_table); @@ -313,7 +307,7 @@ MODULE_DEVICE_TABLE(of, crypto_of_id_table); static int rk_crypto_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; - struct rk_crypto_info *crypto_info; + struct rk_crypto_info *crypto_info, *first; int err = 0; crypto_info = devm_kzalloc(&pdev->dev, @@ -323,7 +317,16 @@ static int rk_crypto_probe(struct platform_device *pdev) goto err_crypto; } - crypto_info->rst = devm_reset_control_get(dev, "crypto-rst"); + crypto_info->dev = &pdev->dev; + platform_set_drvdata(pdev, crypto_info); + + crypto_info->variant = of_device_get_match_data(&pdev->dev); + if (!crypto_info->variant) { + dev_err(&pdev->dev, "Missing variant\n"); + return -EINVAL; + } + + crypto_info->rst = devm_reset_control_array_get_exclusive(dev); if (IS_ERR(crypto_info->rst)) { err = PTR_ERR(crypto_info->rst); goto err_crypto; @@ -333,46 +336,18 @@ static int rk_crypto_probe(struct platform_device *pdev) usleep_range(10, 20); reset_control_deassert(crypto_info->rst); - err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info); - if (err) - goto err_crypto; - - spin_lock_init(&crypto_info->lock); - crypto_info->reg = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(crypto_info->reg)) { err = PTR_ERR(crypto_info->reg); goto err_crypto; } - crypto_info->aclk = devm_clk_get(&pdev->dev, "aclk"); - if (IS_ERR(crypto_info->aclk)) { - err = PTR_ERR(crypto_info->aclk); - goto err_crypto; - } - - crypto_info->hclk = devm_clk_get(&pdev->dev, "hclk"); - if (IS_ERR(crypto_info->hclk)) { - err = PTR_ERR(crypto_info->hclk); - goto err_crypto; - } - - crypto_info->sclk = devm_clk_get(&pdev->dev, "sclk"); - if (IS_ERR(crypto_info->sclk)) { - err = PTR_ERR(crypto_info->sclk); - goto err_crypto; - } - - crypto_info->dmaclk = devm_clk_get(&pdev->dev, "apb_pclk"); - if (IS_ERR(crypto_info->dmaclk)) { - err = PTR_ERR(crypto_info->dmaclk); + err = rk_crypto_get_clks(crypto_info); + if (err) goto err_crypto; - } crypto_info->irq = platform_get_irq(pdev, 0); if (crypto_info->irq < 0) { - dev_warn(crypto_info->dev, - "control Interrupt is not available.\n"); err = crypto_info->irq; goto err_crypto; } @@ -382,49 +357,64 @@ static int rk_crypto_probe(struct platform_device *pdev) "rk-crypto", pdev); if (err) { - dev_err(crypto_info->dev, "irq request failed.\n"); + dev_err(&pdev->dev, "irq request failed.\n"); goto err_crypto; } - crypto_info->dev = &pdev->dev; - platform_set_drvdata(pdev, crypto_info); - - tasklet_init(&crypto_info->queue_task, - rk_crypto_queue_task_cb, (unsigned long)crypto_info); - tasklet_init(&crypto_info->done_task, - rk_crypto_done_task_cb, (unsigned long)crypto_info); - crypto_init_queue(&crypto_info->queue, 50); + crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true); + crypto_engine_start(crypto_info->engine); + init_completion(&crypto_info->complete); - crypto_info->enable_clk = rk_crypto_enable_clk; - crypto_info->disable_clk = rk_crypto_disable_clk; - crypto_info->load_data = rk_load_data; - crypto_info->unload_data = rk_unload_data; - crypto_info->enqueue = rk_crypto_enqueue; - crypto_info->busy = false; + err = rk_crypto_pm_init(crypto_info); + if (err) + goto err_pm; + + spin_lock(&rocklist.lock); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + list_add_tail(&crypto_info->list, &rocklist.dev_list); + spin_unlock(&rocklist.lock); + + if (!first) { + err = rk_crypto_register(crypto_info); + if (err) { + dev_err(dev, "Fail to register crypto algorithms"); + goto err_register_alg; + } - err = rk_crypto_register(crypto_info); - if (err) { - dev_err(dev, "err in register alg"); - goto err_register_alg; + register_debugfs(crypto_info); } - dev_info(dev, "Crypto Accelerator successfully registered\n"); return 0; err_register_alg: - tasklet_kill(&crypto_info->queue_task); - tasklet_kill(&crypto_info->done_task); + rk_crypto_pm_exit(crypto_info); +err_pm: + crypto_engine_exit(crypto_info->engine); err_crypto: + dev_err(dev, "Crypto Accelerator not successfully registered\n"); return err; } static int rk_crypto_remove(struct platform_device *pdev) { struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev); - - rk_crypto_unregister(); - tasklet_kill(&crypto_tmp->done_task); - tasklet_kill(&crypto_tmp->queue_task); + struct rk_crypto_info *first; + + spin_lock_bh(&rocklist.lock); + list_del(&crypto_tmp->list); + first = list_first_entry_or_null(&rocklist.dev_list, + struct rk_crypto_info, list); + spin_unlock_bh(&rocklist.lock); + + if (!first) { +#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG + debugfs_remove_recursive(rocklist.dbgfs_dir); +#endif + rk_crypto_unregister(); + } + rk_crypto_pm_exit(crypto_tmp); + crypto_engine_exit(crypto_tmp->engine); return 0; } @@ -433,6 +423,7 @@ static struct platform_driver crypto_driver = { .remove = rk_crypto_remove, .driver = { .name = "rk3288-crypto", + .pm = &rk_crypto_pm_ops, .of_match_table = crypto_of_id_table, }, }; diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h index 97278c2574ff93a93c2c7f01467a6a1895f14264..b2695258cade969b8898adb0f8cdd1f335263935 100644 --- a/drivers/crypto/rockchip/rk3288_crypto.h +++ b/drivers/crypto/rockchip/rk3288_crypto.h @@ -5,9 +5,13 @@ #include #include #include +#include #include +#include #include +#include #include +#include #include #include @@ -184,85 +188,91 @@ #define CRYPTO_WRITE(dev, offset, val) \ writel_relaxed((val), ((dev)->reg + (offset))) +#define RK_MAX_CLKS 4 + +/* + * struct rockchip_ip - struct for managing a list of RK crypto instance + * @dev_list: Used for doing a list of rk_crypto_info + * @lock: Control access to dev_list + * @dbgfs_dir: Debugfs dentry for statistic directory + * @dbgfs_stats: Debugfs dentry for statistic counters + */ +struct rockchip_ip { + struct list_head dev_list; + spinlock_t lock; /* Control access to dev_list */ + struct dentry *dbgfs_dir; + struct dentry *dbgfs_stats; +}; + +struct rk_clks { + const char *name; + unsigned long max; +}; + +struct rk_variant { + int num_clks; + struct rk_clks rkclks[RK_MAX_CLKS]; +}; + struct rk_crypto_info { + struct list_head list; struct device *dev; - struct clk *aclk; - struct clk *hclk; - struct clk *sclk; - struct clk *dmaclk; + struct clk_bulk_data *clks; + int num_clks; struct reset_control *rst; void __iomem *reg; int irq; - struct crypto_queue queue; - struct tasklet_struct queue_task; - struct tasklet_struct done_task; - struct crypto_async_request *async_req; - int err; - /* device lock */ - spinlock_t lock; - - /* the public variable */ - struct scatterlist *sg_src; - struct scatterlist *sg_dst; - struct scatterlist sg_tmp; - struct scatterlist *first; - unsigned int left_bytes; - void *addr_vir; - int aligned; - int align_size; - size_t src_nents; - size_t dst_nents; - unsigned int total; - unsigned int count; - dma_addr_t addr_in; - dma_addr_t addr_out; - bool busy; - int (*start)(struct rk_crypto_info *dev); - int (*update)(struct rk_crypto_info *dev); - void (*complete)(struct crypto_async_request *base, int err); - int (*enable_clk)(struct rk_crypto_info *dev); - void (*disable_clk)(struct rk_crypto_info *dev); - int (*load_data)(struct rk_crypto_info *dev, - struct scatterlist *sg_src, - struct scatterlist *sg_dst); - void (*unload_data)(struct rk_crypto_info *dev); - int (*enqueue)(struct rk_crypto_info *dev, - struct crypto_async_request *async_req); + const struct rk_variant *variant; + unsigned long nreq; + struct crypto_engine *engine; + struct completion complete; + int status; }; /* the private variable of hash */ struct rk_ahash_ctx { - struct rk_crypto_info *dev; + struct crypto_engine_ctx enginectx; /* for fallback */ struct crypto_ahash *fallback_tfm; }; -/* the privete variable of hash for fallback */ +/* the private variable of hash for fallback */ struct rk_ahash_rctx { + struct rk_crypto_info *dev; struct ahash_request fallback_req; u32 mode; + int nrsg; }; /* the private variable of cipher */ struct rk_cipher_ctx { - struct rk_crypto_info *dev; + struct crypto_engine_ctx enginectx; unsigned int keylen; - u32 mode; + u8 key[AES_MAX_KEY_SIZE]; u8 iv[AES_BLOCK_SIZE]; + struct crypto_skcipher *fallback_tfm; }; -enum alg_type { - ALG_TYPE_HASH, - ALG_TYPE_CIPHER, +struct rk_cipher_rctx { + struct rk_crypto_info *dev; + u8 backup_iv[AES_BLOCK_SIZE]; + u32 mode; + struct skcipher_request fallback_req; // keep at the end }; struct rk_crypto_tmp { - struct rk_crypto_info *dev; + u32 type; + struct rk_crypto_info *dev; union { struct skcipher_alg skcipher; struct ahash_alg hash; } alg; - enum alg_type type; + unsigned long stat_req; + unsigned long stat_fb; + unsigned long stat_fb_len; + unsigned long stat_fb_sglen; + unsigned long stat_fb_align; + unsigned long stat_fb_sgdiff; }; extern struct rk_crypto_tmp rk_ecb_aes_alg; @@ -276,4 +286,5 @@ extern struct rk_crypto_tmp rk_ahash_sha1; extern struct rk_crypto_tmp rk_ahash_sha256; extern struct rk_crypto_tmp rk_ahash_md5; +struct rk_crypto_info *get_rk_crypto(void); #endif diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c index ed03058497bc2863e1c6333fb42f025efd85291b..a78ff3dcd0b1f366081948c34705d8b72f8a99ff 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c +++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c @@ -9,6 +9,8 @@ * Some ideas are from marvell/cesa.c and s5p-sss.c driver. */ #include +#include +#include #include "rk3288_crypto.h" /* @@ -16,6 +18,44 @@ * so we put the fixed hash out when met zero message. */ +static bool rk_ahash_need_fallback(struct ahash_request *req) +{ + struct scatterlist *sg; + + sg = req->src; + while (sg) { + if (!IS_ALIGNED(sg->offset, sizeof(u32))) { + return true; + } + if (sg->length % 4) { + return true; + } + sg = sg_next(sg); + } + return false; +} + +static int rk_ahash_digest_fb(struct ahash_request *areq) +{ + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); + + algt->stat_fb++; + + ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm); + rctx->fallback_req.base.flags = areq->base.flags & + CRYPTO_TFM_REQ_MAY_SLEEP; + + rctx->fallback_req.nbytes = areq->nbytes; + rctx->fallback_req.src = areq->src; + rctx->fallback_req.result = areq->result; + + return crypto_ahash_digest(&rctx->fallback_req); +} + static int zero_message_process(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); @@ -38,15 +78,9 @@ static int zero_message_process(struct ahash_request *req) return 0; } -static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err) +static void rk_ahash_reg_init(struct ahash_request *req, + struct rk_crypto_info *dev) { - if (base->complete) - base->complete(base, err); -} - -static void rk_ahash_reg_init(struct rk_crypto_info *dev) -{ - struct ahash_request *req = ahash_request_cast(dev->async_req); struct rk_ahash_rctx *rctx = ahash_request_ctx(req); int reg_status; @@ -74,7 +108,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev) RK_CRYPTO_BYTESWAP_BRFIFO | RK_CRYPTO_BYTESWAP_BTFIFO); - CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total); + CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes); } static int rk_ahash_init(struct ahash_request *req) @@ -164,51 +198,80 @@ static int rk_ahash_export(struct ahash_request *req, void *out) static int rk_ahash_digest(struct ahash_request *req) { - struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm); - struct rk_crypto_info *dev = tctx->dev; + struct rk_ahash_rctx *rctx = ahash_request_ctx(req); + struct rk_crypto_info *dev; + struct crypto_engine *engine; + + if (rk_ahash_need_fallback(req)) + return rk_ahash_digest_fb(req); if (!req->nbytes) return zero_message_process(req); - else - return dev->enqueue(dev, &req->base); + + dev = get_rk_crypto(); + + rctx->dev = dev; + engine = dev->engine; + + return crypto_transfer_hash_request_to_engine(engine, req); } -static void crypto_ahash_dma_start(struct rk_crypto_info *dev) +static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg) { - CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in); - CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4); + CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg)); + CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4); CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START | (RK_CRYPTO_HASH_START << 16)); } -static int rk_ahash_set_data_start(struct rk_crypto_info *dev) +static int rk_hash_prepare(struct crypto_engine *engine, void *breq) { - int err; + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct rk_crypto_info *rkc = rctx->dev; + int ret; - err = dev->load_data(dev, dev->sg_src, NULL); - if (!err) - crypto_ahash_dma_start(dev); - return err; + ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE); + if (ret <= 0) + return -EINVAL; + + rctx->nrsg = ret; + + return 0; } -static int rk_ahash_start(struct rk_crypto_info *dev) +static int rk_hash_unprepare(struct crypto_engine *engine, void *breq) { - struct ahash_request *req = ahash_request_cast(dev->async_req); - struct crypto_ahash *tfm; - struct rk_ahash_rctx *rctx; - - dev->total = req->nbytes; - dev->left_bytes = req->nbytes; - dev->aligned = 0; - dev->align_size = 4; - dev->sg_dst = NULL; - dev->sg_src = req->src; - dev->first = req->src; - dev->src_nents = sg_nents(req->src); - rctx = ahash_request_ctx(req); + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct rk_crypto_info *rkc = rctx->dev; + + dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE); + return 0; +} + +static int rk_hash_run(struct crypto_engine *engine, void *breq) +{ + struct ahash_request *areq = container_of(breq, struct ahash_request, base); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + struct rk_ahash_rctx *rctx = ahash_request_ctx(areq); + struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); + struct scatterlist *sg = areq->src; + struct rk_crypto_info *rkc = rctx->dev; + int err = 0; + int i; + u32 v; + + err = pm_runtime_resume_and_get(rkc->dev); + if (err) + return err; + rctx->mode = 0; - tfm = crypto_ahash_reqtfm(req); + algt->stat_req++; + rkc->nreq++; + switch (crypto_ahash_digestsize(tfm)) { case SHA1_DIGEST_SIZE: rctx->mode = RK_CRYPTO_HASH_SHA1; @@ -220,100 +283,88 @@ static int rk_ahash_start(struct rk_crypto_info *dev) rctx->mode = RK_CRYPTO_HASH_MD5; break; default: - return -EINVAL; + err = -EINVAL; + goto theend; } - rk_ahash_reg_init(dev); - return rk_ahash_set_data_start(dev); -} - -static int rk_ahash_crypto_rx(struct rk_crypto_info *dev) -{ - int err = 0; - struct ahash_request *req = ahash_request_cast(dev->async_req); - struct crypto_ahash *tfm; - - dev->unload_data(dev); - if (dev->left_bytes) { - if (dev->aligned) { - if (sg_is_last(dev->sg_src)) { - dev_warn(dev->dev, "[%s:%d], Lack of data\n", - __func__, __LINE__); - err = -ENOMEM; - goto out_rx; - } - dev->sg_src = sg_next(dev->sg_src); + rk_ahash_reg_init(areq, rkc); + + while (sg) { + reinit_completion(&rkc->complete); + rkc->status = 0; + crypto_ahash_dma_start(rkc, sg); + wait_for_completion_interruptible_timeout(&rkc->complete, + msecs_to_jiffies(2000)); + if (!rkc->status) { + dev_err(rkc->dev, "DMA timeout\n"); + err = -EFAULT; + goto theend; } - err = rk_ahash_set_data_start(dev); - } else { - /* - * it will take some time to process date after last dma - * transmission. - * - * waiting time is relative with the last date len, - * so cannot set a fixed time here. - * 10us makes system not call here frequently wasting - * efficiency, and make it response quickly when dma - * complete. - */ - while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS)) - udelay(10); - - tfm = crypto_ahash_reqtfm(req); - memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0, - crypto_ahash_digestsize(tfm)); - dev->complete(dev->async_req, 0); - tasklet_schedule(&dev->queue_task); + sg = sg_next(sg); } -out_rx: - return err; + /* + * it will take some time to process date after last dma + * transmission. + * + * waiting time is relative with the last date len, + * so cannot set a fixed time here. + * 10us makes system not call here frequently wasting + * efficiency, and make it response quickly when dma + * complete. + */ + readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000); + + for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) { + v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4); + put_unaligned_le32(v, areq->result + i * 4); + } + +theend: + pm_runtime_put_autosuspend(rkc->dev); + + local_bh_disable(); + crypto_finalize_hash_request(engine, breq, err); + local_bh_enable(); + + return 0; } static int rk_cra_hash_init(struct crypto_tfm *tfm) { struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); - struct rk_crypto_tmp *algt; - struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); - const char *alg_name = crypto_tfm_alg_name(tfm); - - algt = container_of(alg, struct rk_crypto_tmp, alg.hash); - - tctx->dev = algt->dev; - tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL); - if (!tctx->dev->addr_vir) { - dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n"); - return -ENOMEM; - } - tctx->dev->start = rk_ahash_start; - tctx->dev->update = rk_ahash_crypto_rx; - tctx->dev->complete = rk_ahash_crypto_complete; + struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash); /* for fallback */ tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0, - CRYPTO_ALG_NEED_FALLBACK); + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(tctx->fallback_tfm)) { - dev_err(tctx->dev->dev, "Could not load fallback driver.\n"); + dev_err(algt->dev->dev, "Could not load fallback driver.\n"); return PTR_ERR(tctx->fallback_tfm); } + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct rk_ahash_rctx) + crypto_ahash_reqsize(tctx->fallback_tfm)); - return tctx->dev->enable_clk(tctx->dev); + tctx->enginectx.op.do_one_request = rk_hash_run; + tctx->enginectx.op.prepare_request = rk_hash_prepare; + tctx->enginectx.op.unprepare_request = rk_hash_unprepare; + + return 0; } static void rk_cra_hash_exit(struct crypto_tfm *tfm) { struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm); - free_page((unsigned long)tctx->dev->addr_vir); - return tctx->dev->disable_clk(tctx->dev); + crypto_free_ahash(tctx->fallback_tfm); } struct rk_crypto_tmp rk_ahash_sha1 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, @@ -337,13 +388,13 @@ struct rk_crypto_tmp rk_ahash_sha1 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } - } + } + } } }; struct rk_crypto_tmp rk_ahash_sha256 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, @@ -367,13 +418,13 @@ struct rk_crypto_tmp rk_ahash_sha256 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } - } + } + } } }; struct rk_crypto_tmp rk_ahash_md5 = { - .type = ALG_TYPE_HASH, + .type = CRYPTO_ALG_TYPE_AHASH, .alg.hash = { .init = rk_ahash_init, .update = rk_ahash_update, @@ -397,7 +448,7 @@ struct rk_crypto_tmp rk_ahash_md5 = { .cra_init = rk_cra_hash_init, .cra_exit = rk_cra_hash_exit, .cra_module = THIS_MODULE, - } } + } } }; diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c index 5bbf0d2722e11cffef500ad42a27cb7d5e65980a..59069457582bf6d4a5194e13a8cc19caf9782034 100644 --- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c +++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c @@ -9,23 +9,94 @@ * Some ideas are from marvell-cesa.c and s5p-sss.c driver. */ #include +#include #include "rk3288_crypto.h" #define RK_CRYPTO_DEC BIT(0) -static void rk_crypto_complete(struct crypto_async_request *base, int err) +static int rk_cipher_need_fallback(struct skcipher_request *req) { - if (base->complete) - base->complete(base, err); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + struct scatterlist *sgs, *sgd; + unsigned int stodo, dtodo, len; + unsigned int bs = crypto_skcipher_blocksize(tfm); + + if (!req->cryptlen) + return true; + + len = req->cryptlen; + sgs = req->src; + sgd = req->dst; + while (sgs && sgd) { + if (!IS_ALIGNED(sgs->offset, sizeof(u32))) { + algt->stat_fb_align++; + return true; + } + if (!IS_ALIGNED(sgd->offset, sizeof(u32))) { + algt->stat_fb_align++; + return true; + } + stodo = min(len, sgs->length); + if (stodo % bs) { + algt->stat_fb_len++; + return true; + } + dtodo = min(len, sgd->length); + if (dtodo % bs) { + algt->stat_fb_len++; + return true; + } + if (stodo != dtodo) { + algt->stat_fb_sgdiff++; + return true; + } + len -= stodo; + sgs = sg_next(sgs); + sgd = sg_next(sgd); + } + return false; } -static int rk_handle_req(struct rk_crypto_info *dev, - struct skcipher_request *req) +static int rk_cipher_fallback(struct skcipher_request *areq) { - if (!IS_ALIGNED(req->cryptlen, dev->align_size)) - return -EINVAL; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + int err; + + algt->stat_fb++; + + skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm); + skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags, + areq->base.complete, areq->base.data); + skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst, + areq->cryptlen, areq->iv); + if (rctx->mode & RK_CRYPTO_DEC) + err = crypto_skcipher_decrypt(&rctx->fallback_req); else - return dev->enqueue(dev, &req->base); + err = crypto_skcipher_encrypt(&rctx->fallback_req); + return err; +} + +static int rk_cipher_handle_req(struct skcipher_request *req) +{ + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); + struct rk_crypto_info *rkc; + struct crypto_engine *engine; + + if (rk_cipher_need_fallback(req)) + return rk_cipher_fallback(req); + + rkc = get_rk_crypto(); + + engine = rkc->engine; + rctx->dev = rkc; + + return crypto_transfer_skcipher_request_to_engine(engine, req); } static int rk_aes_setkey(struct crypto_skcipher *cipher, @@ -38,8 +109,9 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher, keylen != AES_KEYSIZE_256) return -EINVAL; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen); - return 0; + memcpy(ctx->key, key, keylen); + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_des_setkey(struct crypto_skcipher *cipher, @@ -53,8 +125,9 @@ static int rk_des_setkey(struct crypto_skcipher *cipher, return err; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); - return 0; + memcpy(ctx->key, key, keylen); + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_tdes_setkey(struct crypto_skcipher *cipher, @@ -68,161 +141,136 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher, return err; ctx->keylen = keylen; - memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen); - return 0; + memcpy(ctx->key, key, keylen); + + return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen); } static int rk_aes_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_AES_ECB_MODE; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_AES_ECB_MODE; + return rk_cipher_handle_req(req); } static int rk_aes_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC; + return rk_cipher_handle_req(req); } static int rk_aes_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_AES_CBC_MODE; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_AES_CBC_MODE; + return rk_cipher_handle_req(req); } static int rk_aes_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC; + return rk_cipher_handle_req(req); } static int rk_des_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = 0; - return rk_handle_req(dev, req); + rctx->mode = 0; + return rk_cipher_handle_req(req); } static int rk_des_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_DEC; + return rk_cipher_handle_req(req); } static int rk_des_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC; + return rk_cipher_handle_req(req); } static int rk_des_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; + return rk_cipher_handle_req(req); } static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_SELECT; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_TDES_SELECT; + return rk_cipher_handle_req(req); } static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC; + return rk_cipher_handle_req(req); } static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; - return rk_handle_req(dev, req); + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC; + return rk_cipher_handle_req(req); } static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req) { - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - struct rk_crypto_info *dev = ctx->dev; + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); - ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | + rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC; - return rk_handle_req(dev, req); + return rk_cipher_handle_req(req); } -static void rk_ablk_hw_init(struct rk_crypto_info *dev) +static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req) { - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req); struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(req); struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher); - u32 ivsize, block, conf_reg = 0; + u32 block, conf_reg = 0; block = crypto_tfm_alg_blocksize(tfm); - ivsize = crypto_skcipher_ivsize(cipher); if (block == DES_BLOCK_SIZE) { - ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | + rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE | RK_CRYPTO_TDES_BYTESWAP_KEY | RK_CRYPTO_TDES_BYTESWAP_IV; - CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode); - memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize); + CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode); + memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen); conf_reg = RK_CRYPTO_DESSEL; } else { - ctx->mode |= RK_CRYPTO_AES_FIFO_MODE | + rctx->mode |= RK_CRYPTO_AES_FIFO_MODE | RK_CRYPTO_AES_KEY_CHANGE | RK_CRYPTO_AES_BYTESWAP_KEY | RK_CRYPTO_AES_BYTESWAP_IV; if (ctx->keylen == AES_KEYSIZE_192) - ctx->mode |= RK_CRYPTO_AES_192BIT_key; + rctx->mode |= RK_CRYPTO_AES_192BIT_key; else if (ctx->keylen == AES_KEYSIZE_256) - ctx->mode |= RK_CRYPTO_AES_256BIT_key; - CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode); - memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize); + rctx->mode |= RK_CRYPTO_AES_256BIT_key; + CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode); + memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen); } conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO | RK_CRYPTO_BYTESWAP_BRFIFO; @@ -231,189 +279,196 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev) RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA); } -static void crypto_dma_start(struct rk_crypto_info *dev) +static void crypto_dma_start(struct rk_crypto_info *dev, + struct scatterlist *sgs, + struct scatterlist *sgd, unsigned int todo) { - CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in); - CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4); - CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out); + CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs)); + CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo); + CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd)); CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START | _SBF(RK_CRYPTO_BLOCK_START, 16)); } -static int rk_set_data_start(struct rk_crypto_info *dev) +static int rk_cipher_run(struct crypto_engine *engine, void *async_req) { - int err; - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); - u8 *src_last_blk = page_address(sg_page(dev->sg_src)) + - dev->sg_src->offset + dev->sg_src->length - ivsize; - - /* Store the iv that need to be updated in chain mode. - * And update the IV buffer to contain the next IV for decryption mode. - */ - if (ctx->mode & RK_CRYPTO_DEC) { - memcpy(ctx->iv, src_last_blk, ivsize); - sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv, - ivsize, dev->total - ivsize); - } - - err = dev->load_data(dev, dev->sg_src, dev->sg_dst); - if (!err) - crypto_dma_start(dev); - return err; -} - -static int rk_ablk_start(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - unsigned long flags; + struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base); + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq); + struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq); + struct scatterlist *sgs, *sgd; int err = 0; + int ivsize = crypto_skcipher_ivsize(tfm); + int offset; + u8 iv[AES_BLOCK_SIZE]; + u8 biv[AES_BLOCK_SIZE]; + u8 *ivtouse = areq->iv; + unsigned int len = areq->cryptlen; + unsigned int todo; + struct skcipher_alg *alg = crypto_skcipher_alg(tfm); + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + struct rk_crypto_info *rkc = rctx->dev; - dev->left_bytes = req->cryptlen; - dev->total = req->cryptlen; - dev->sg_src = req->src; - dev->first = req->src; - dev->src_nents = sg_nents(req->src); - dev->sg_dst = req->dst; - dev->dst_nents = sg_nents(req->dst); - dev->aligned = 1; - - spin_lock_irqsave(&dev->lock, flags); - rk_ablk_hw_init(dev); - err = rk_set_data_start(dev); - spin_unlock_irqrestore(&dev->lock, flags); - return err; -} + err = pm_runtime_resume_and_get(rkc->dev); + if (err) + return err; -static void rk_iv_copyback(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); + algt->stat_req++; + rkc->nreq++; - /* Update the IV buffer to contain the next IV for encryption mode. */ - if (!(ctx->mode & RK_CRYPTO_DEC)) { - if (dev->aligned) { - memcpy(req->iv, sg_virt(dev->sg_dst) + - dev->sg_dst->length - ivsize, ivsize); - } else { - memcpy(req->iv, dev->addr_vir + - dev->count - ivsize, ivsize); + ivsize = crypto_skcipher_ivsize(tfm); + if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) { + if (rctx->mode & RK_CRYPTO_DEC) { + offset = areq->cryptlen - ivsize; + scatterwalk_map_and_copy(rctx->backup_iv, areq->src, + offset, ivsize, 0); } } -} -static void rk_update_iv(struct rk_crypto_info *dev) -{ - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 ivsize = crypto_skcipher_ivsize(tfm); - u8 *new_iv = NULL; + sgs = areq->src; + sgd = areq->dst; - if (ctx->mode & RK_CRYPTO_DEC) { - new_iv = ctx->iv; - } else { - new_iv = page_address(sg_page(dev->sg_dst)) + - dev->sg_dst->offset + dev->sg_dst->length - ivsize; + while (sgs && sgd && len) { + if (!sgs->length) { + sgs = sg_next(sgs); + sgd = sg_next(sgd); + continue; + } + if (rctx->mode & RK_CRYPTO_DEC) { + /* we backup last block of source to be used as IV at next step */ + offset = sgs->length - ivsize; + scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0); + } + if (sgs == sgd) { + err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); + if (err <= 0) { + err = -EINVAL; + goto theend_iv; + } + } else { + err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); + if (err <= 0) { + err = -EINVAL; + goto theend_iv; + } + err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); + if (err <= 0) { + err = -EINVAL; + goto theend_sgs; + } + } + err = 0; + rk_cipher_hw_init(rkc, areq); + if (ivsize) { + if (ivsize == DES_BLOCK_SIZE) + memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize); + else + memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize); + } + reinit_completion(&rkc->complete); + rkc->status = 0; + + todo = min(sg_dma_len(sgs), len); + len -= todo; + crypto_dma_start(rkc, sgs, sgd, todo / 4); + wait_for_completion_interruptible_timeout(&rkc->complete, + msecs_to_jiffies(2000)); + if (!rkc->status) { + dev_err(rkc->dev, "DMA timeout\n"); + err = -EFAULT; + goto theend; + } + if (sgs == sgd) { + dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); + } else { + dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); + } + if (rctx->mode & RK_CRYPTO_DEC) { + memcpy(iv, biv, ivsize); + ivtouse = iv; + } else { + offset = sgd->length - ivsize; + scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0); + ivtouse = iv; + } + sgs = sg_next(sgs); + sgd = sg_next(sgd); } - if (ivsize == DES_BLOCK_SIZE) - memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize); - else if (ivsize == AES_BLOCK_SIZE) - memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize); -} - -/* return: - * true some err was occurred - * fault no err, continue - */ -static int rk_ablk_rx(struct rk_crypto_info *dev) -{ - int err = 0; - struct skcipher_request *req = - skcipher_request_cast(dev->async_req); - - dev->unload_data(dev); - if (!dev->aligned) { - if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents, - dev->addr_vir, dev->count, - dev->total - dev->left_bytes - - dev->count)) { - err = -EINVAL; - goto out_rx; + if (areq->iv && ivsize > 0) { + offset = areq->cryptlen - ivsize; + if (rctx->mode & RK_CRYPTO_DEC) { + memcpy(areq->iv, rctx->backup_iv, ivsize); + memzero_explicit(rctx->backup_iv, ivsize); + } else { + scatterwalk_map_and_copy(areq->iv, areq->dst, offset, + ivsize, 0); } } - if (dev->left_bytes) { - rk_update_iv(dev); - if (dev->aligned) { - if (sg_is_last(dev->sg_src)) { - dev_err(dev->dev, "[%s:%d] Lack of data\n", - __func__, __LINE__); - err = -ENOMEM; - goto out_rx; - } - dev->sg_src = sg_next(dev->sg_src); - dev->sg_dst = sg_next(dev->sg_dst); - } - err = rk_set_data_start(dev); + +theend: + pm_runtime_put_autosuspend(rkc->dev); + + local_bh_disable(); + crypto_finalize_skcipher_request(engine, areq, err); + local_bh_enable(); + return 0; + +theend_sgs: + if (sgs == sgd) { + dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL); } else { - rk_iv_copyback(dev); - /* here show the calculation is over without any err */ - dev->complete(dev->async_req, 0); - tasklet_schedule(&dev->queue_task); + dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE); + dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE); } -out_rx: +theend_iv: return err; } -static int rk_ablk_init_tfm(struct crypto_skcipher *tfm) +static int rk_cipher_tfm_init(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); + const char *name = crypto_tfm_alg_name(&tfm->base); struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct rk_crypto_tmp *algt; + struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); - algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher); + ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(ctx->fallback_tfm)) { + dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n", + name, PTR_ERR(ctx->fallback_tfm)); + return PTR_ERR(ctx->fallback_tfm); + } + + tfm->reqsize = sizeof(struct rk_cipher_rctx) + + crypto_skcipher_reqsize(ctx->fallback_tfm); - ctx->dev = algt->dev; - ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1; - ctx->dev->start = rk_ablk_start; - ctx->dev->update = rk_ablk_rx; - ctx->dev->complete = rk_crypto_complete; - ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL); + ctx->enginectx.op.do_one_request = rk_cipher_run; - return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM; + return 0; } -static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm) +static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm) { struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm); - free_page((unsigned long)ctx->dev->addr_vir); - ctx->dev->disable_clk(ctx->dev); + memzero_explicit(ctx->key, ctx->keylen); + crypto_free_skcipher(ctx->fallback_tfm); } struct rk_crypto_tmp rk_ecb_aes_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(aes)", .base.cra_driver_name = "ecb-aes-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x0f, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .setkey = rk_aes_setkey, @@ -423,19 +478,19 @@ struct rk_crypto_tmp rk_ecb_aes_alg = { }; struct rk_crypto_tmp rk_cbc_aes_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(aes)", .base.cra_driver_name = "cbc-aes-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = AES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x0f, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .ivsize = AES_BLOCK_SIZE, @@ -446,19 +501,19 @@ struct rk_crypto_tmp rk_cbc_aes_alg = { }; struct rk_crypto_tmp rk_ecb_des_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des)", .base.cra_driver_name = "ecb-des-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, .setkey = rk_des_setkey, @@ -468,19 +523,19 @@ struct rk_crypto_tmp rk_ecb_des_alg = { }; struct rk_crypto_tmp rk_cbc_des_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(des)", .base.cra_driver_name = "cbc-des-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES_KEY_SIZE, .max_keysize = DES_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, @@ -491,19 +546,19 @@ struct rk_crypto_tmp rk_cbc_des_alg = { }; struct rk_crypto_tmp rk_ecb_des3_ede_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "ecb(des3_ede)", .base.cra_driver_name = "ecb-des3-ede-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, .setkey = rk_tdes_setkey, @@ -513,19 +568,19 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = { }; struct rk_crypto_tmp rk_cbc_des3_ede_alg = { - .type = ALG_TYPE_CIPHER, + .type = CRYPTO_ALG_TYPE_SKCIPHER, .alg.skcipher = { .base.cra_name = "cbc(des3_ede)", .base.cra_driver_name = "cbc-des3-ede-rk", .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, + .base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, .base.cra_blocksize = DES_BLOCK_SIZE, .base.cra_ctxsize = sizeof(struct rk_cipher_ctx), .base.cra_alignmask = 0x07, .base.cra_module = THIS_MODULE, - .init = rk_ablk_init_tfm, - .exit = rk_ablk_exit_tfm, + .init = rk_cipher_tfm_init, + .exit = rk_cipher_tfm_exit, .min_keysize = DES3_EDE_KEY_SIZE, .max_keysize = DES3_EDE_KEY_SIZE, .ivsize = DES_BLOCK_SIZE, diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig index 4a4c3284ae1f3b5de3187a2082a858c79582e187..4fc581e9e59569a32230d0e84ba543944fb02507 100644 --- a/drivers/crypto/stm32/Kconfig +++ b/drivers/crypto/stm32/Kconfig @@ -10,7 +10,7 @@ config CRYPTO_DEV_STM32_CRC config CRYPTO_DEV_STM32_HASH tristate "Support for STM32 hash accelerators" - depends on ARCH_STM32 + depends on ARCH_STM32 || ARCH_U8500 depends on HAS_DMA select CRYPTO_HASH select CRYPTO_MD5 @@ -23,7 +23,7 @@ config CRYPTO_DEV_STM32_HASH config CRYPTO_DEV_STM32_CRYP tristate "Support for STM32 cryp accelerators" - depends on ARCH_STM32 + depends on ARCH_STM32 || ARCH_U8500 select CRYPTO_HASH select CRYPTO_ENGINE select CRYPTO_LIB_DES diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c index 59ef541123ae62d9bf29ffbe20462a079fafc312..4208338e72b6051da268abdbace9790c2186b346 100644 --- a/drivers/crypto/stm32/stm32-cryp.c +++ b/drivers/crypto/stm32/stm32-cryp.c @@ -2,6 +2,7 @@ /* * Copyright (C) STMicroelectronics SA 2017 * Author: Fabien Dessenne + * Ux500 support taken from snippets in the old Ux500 cryp driver */ #include @@ -62,6 +63,29 @@ #define CRYP_CSGCMCCM0R 0x00000050 #define CRYP_CSGCM0R 0x00000070 +#define UX500_CRYP_CR 0x00000000 +#define UX500_CRYP_SR 0x00000004 +#define UX500_CRYP_DIN 0x00000008 +#define UX500_CRYP_DINSIZE 0x0000000C +#define UX500_CRYP_DOUT 0x00000010 +#define UX500_CRYP_DOUSIZE 0x00000014 +#define UX500_CRYP_DMACR 0x00000018 +#define UX500_CRYP_IMSC 0x0000001C +#define UX500_CRYP_RIS 0x00000020 +#define UX500_CRYP_MIS 0x00000024 +#define UX500_CRYP_K1L 0x00000028 +#define UX500_CRYP_K1R 0x0000002C +#define UX500_CRYP_K2L 0x00000030 +#define UX500_CRYP_K2R 0x00000034 +#define UX500_CRYP_K3L 0x00000038 +#define UX500_CRYP_K3R 0x0000003C +#define UX500_CRYP_K4L 0x00000040 +#define UX500_CRYP_K4R 0x00000044 +#define UX500_CRYP_IV0L 0x00000048 +#define UX500_CRYP_IV0R 0x0000004C +#define UX500_CRYP_IV1L 0x00000050 +#define UX500_CRYP_IV1R 0x00000054 + /* Registers values */ #define CR_DEC_NOT_ENC 0x00000004 #define CR_TDES_ECB 0x00000000 @@ -71,7 +95,8 @@ #define CR_AES_ECB 0x00000020 #define CR_AES_CBC 0x00000028 #define CR_AES_CTR 0x00000030 -#define CR_AES_KP 0x00000038 +#define CR_AES_KP 0x00000038 /* Not on Ux500 */ +#define CR_AES_XTS 0x00000038 /* Only on Ux500 */ #define CR_AES_GCM 0x00080000 #define CR_AES_CCM 0x00080008 #define CR_AES_UNKNOWN 0xFFFFFFFF @@ -83,6 +108,8 @@ #define CR_KEY128 0x00000000 #define CR_KEY192 0x00000100 #define CR_KEY256 0x00000200 +#define CR_KEYRDEN 0x00000400 /* Only on Ux500 */ +#define CR_KSE 0x00000800 /* Only on Ux500 */ #define CR_FFLUSH 0x00004000 #define CR_CRYPEN 0x00008000 #define CR_PH_INIT 0x00000000 @@ -107,8 +134,25 @@ #define CRYP_AUTOSUSPEND_DELAY 50 struct stm32_cryp_caps { - bool swap_final; - bool padding_wa; + bool aeads_support; + bool linear_aes_key; + bool kp_mode; + bool iv_protection; + bool swap_final; + bool padding_wa; + u32 cr; + u32 sr; + u32 din; + u32 dout; + u32 imsc; + u32 mis; + u32 k1l; + u32 k1r; + u32 k3r; + u32 iv0l; + u32 iv0r; + u32 iv1l; + u32 iv1r; }; struct stm32_cryp_ctx { @@ -228,20 +272,21 @@ static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status, !(status & SR_BUSY), 10, 100000); } static inline void stm32_cryp_enable(struct stm32_cryp *cryp) { - writel_relaxed(readl_relaxed(cryp->regs + CRYP_CR) | CR_CRYPEN, cryp->regs + CRYP_CR); + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_CRYPEN, + cryp->regs + cryp->caps->cr); } static inline int stm32_cryp_wait_enable(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_CR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->cr, status, !(status & CR_CRYPEN), 10, 100000); } @@ -249,10 +294,22 @@ static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp) { u32 status; - return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status, + return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status, status & SR_OFNE, 10, 100000); } +static inline void stm32_cryp_key_read_enable(struct stm32_cryp *cryp) +{ + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_KEYRDEN, + cryp->regs + cryp->caps->cr); +} + +static inline void stm32_cryp_key_read_disable(struct stm32_cryp *cryp) +{ + writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) & ~CR_KEYRDEN, + cryp->regs + cryp->caps->cr); +} + static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp); static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err); @@ -281,12 +338,12 @@ static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, __be32 *iv) if (!iv) return; - stm32_cryp_write(cryp, CRYP_IV0LR, be32_to_cpu(*iv++)); - stm32_cryp_write(cryp, CRYP_IV0RR, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv0l, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv0r, be32_to_cpu(*iv++)); if (is_aes(cryp)) { - stm32_cryp_write(cryp, CRYP_IV1LR, be32_to_cpu(*iv++)); - stm32_cryp_write(cryp, CRYP_IV1RR, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv1l, be32_to_cpu(*iv++)); + stm32_cryp_write(cryp, cryp->caps->iv1r, be32_to_cpu(*iv++)); } } @@ -298,12 +355,102 @@ static void stm32_cryp_get_iv(struct stm32_cryp *cryp) if (!tmp) return; - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR)); - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR)); + if (cryp->caps->iv_protection) + stm32_cryp_key_read_enable(cryp); + + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r)); if (is_aes(cryp)) { - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR)); - *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l)); + *tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r)); + } + + if (cryp->caps->iv_protection) + stm32_cryp_key_read_disable(cryp); +} + +/** + * ux500_swap_bits_in_byte() - mirror the bits in a byte + * @b: the byte to be mirrored + * + * The bits are swapped the following way: + * Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and + * nibble 2 (n2) bits 4-7. + * + * Nibble 1 (n1): + * (The "old" (moved) bit is replaced with a zero) + * 1. Move bit 6 and 7, 4 positions to the left. + * 2. Move bit 3 and 5, 2 positions to the left. + * 3. Move bit 1-4, 1 position to the left. + * + * Nibble 2 (n2): + * 1. Move bit 0 and 1, 4 positions to the right. + * 2. Move bit 2 and 4, 2 positions to the right. + * 3. Move bit 3-6, 1 position to the right. + * + * Combine the two nibbles to a complete and swapped byte. + */ +static inline u8 ux500_swap_bits_in_byte(u8 b) +{ +#define R_SHIFT_4_MASK 0xc0 /* Bits 6 and 7, right shift 4 */ +#define R_SHIFT_2_MASK 0x28 /* (After right shift 4) Bits 3 and 5, + right shift 2 */ +#define R_SHIFT_1_MASK 0x1e /* (After right shift 2) Bits 1-4, + right shift 1 */ +#define L_SHIFT_4_MASK 0x03 /* Bits 0 and 1, left shift 4 */ +#define L_SHIFT_2_MASK 0x14 /* (After left shift 4) Bits 2 and 4, + left shift 2 */ +#define L_SHIFT_1_MASK 0x78 /* (After left shift 1) Bits 3-6, + left shift 1 */ + + u8 n1; + u8 n2; + + /* Swap most significant nibble */ + /* Right shift 4, bits 6 and 7 */ + n1 = ((b & R_SHIFT_4_MASK) >> 4) | (b & ~(R_SHIFT_4_MASK >> 4)); + /* Right shift 2, bits 3 and 5 */ + n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2)); + /* Right shift 1, bits 1-4 */ + n1 = (n1 & R_SHIFT_1_MASK) >> 1; + + /* Swap least significant nibble */ + /* Left shift 4, bits 0 and 1 */ + n2 = ((b & L_SHIFT_4_MASK) << 4) | (b & ~(L_SHIFT_4_MASK << 4)); + /* Left shift 2, bits 2 and 4 */ + n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2)); + /* Left shift 1, bits 3-6 */ + n2 = (n2 & L_SHIFT_1_MASK) << 1; + + return n1 | n2; +} + +/** + * ux500_swizzle_key() - Shuffle around words and bits in the AES key + * @in: key to swizzle + * @out: swizzled key + * @len: length of key, in bytes + * + * This "key swizzling procedure" is described in the examples in the + * DB8500 design specification. There is no real description of why + * the bits have been arranged like this in the hardware. + */ +static inline void ux500_swizzle_key(const u8 *in, u8 *out, u32 len) +{ + int i = 0; + int bpw = sizeof(u32); + int j; + int index = 0; + + j = len - bpw; + while (j >= 0) { + for (i = 0; i < bpw; i++) { + index = len - j - bpw + i; + out[j + i] = + ux500_swap_bits_in_byte(in[index]); + } + j -= bpw; } } @@ -313,14 +460,33 @@ static void stm32_cryp_hw_write_key(struct stm32_cryp *c) int r_id; if (is_des(c)) { - stm32_cryp_write(c, CRYP_K1LR, be32_to_cpu(c->ctx->key[0])); - stm32_cryp_write(c, CRYP_K1RR, be32_to_cpu(c->ctx->key[1])); - } else { - r_id = CRYP_K3RR; - for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4) - stm32_cryp_write(c, r_id, - be32_to_cpu(c->ctx->key[i - 1])); + stm32_cryp_write(c, c->caps->k1l, be32_to_cpu(c->ctx->key[0])); + stm32_cryp_write(c, c->caps->k1r, be32_to_cpu(c->ctx->key[1])); + return; } + + /* + * On the Ux500 the AES key is considered as a single bit sequence + * of 128, 192 or 256 bits length. It is written linearly into the + * registers from K1L and down, and need to be processed to become + * a proper big-endian bit sequence. + */ + if (is_aes(c) && c->caps->linear_aes_key) { + u32 tmpkey[8]; + + ux500_swizzle_key((u8 *)c->ctx->key, + (u8 *)tmpkey, c->ctx->keylen); + + r_id = c->caps->k1l; + for (i = 0; i < c->ctx->keylen / sizeof(u32); i++, r_id += 4) + stm32_cryp_write(c, r_id, tmpkey[i]); + + return; + } + + r_id = c->caps->k3r; + for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4) + stm32_cryp_write(c, r_id, be32_to_cpu(c->ctx->key[i - 1])); } static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp) @@ -373,7 +539,7 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg) cryp->gcm_ctr = GCM_CTR_INIT; stm32_cryp_hw_write_iv(cryp, iv); - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN); + stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN); /* Wait for end of processing */ ret = stm32_cryp_wait_enable(cryp); @@ -385,10 +551,10 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg) /* Prepare next phase */ if (cryp->areq->assoclen) { cfg |= CR_PH_HEADER; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else if (stm32_cryp_get_input_text_len(cryp)) { cfg |= CR_PH_PAYLOAD; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } return 0; @@ -405,20 +571,20 @@ static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp) err = stm32_cryp_wait_busy(cryp); if (err) { dev_err(cryp->dev, "Timeout (gcm/ccm header)\n"); - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); stm32_cryp_finish_req(cryp, err); return; } if (stm32_cryp_get_input_text_len(cryp)) { /* Phase 3 : payload */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); cfg &= ~CR_PH_MASK; cfg |= CR_PH_PAYLOAD | CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else { /* * Phase 4 : tag. @@ -458,7 +624,7 @@ static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp) scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0); for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->header_in -= written; @@ -494,7 +660,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) b0[AES_BLOCK_SIZE - 1] = textlen & 0xFF; /* Enable HW */ - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN); + stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN); /* Write B0 */ d = (u32 *)b0; @@ -505,7 +671,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) if (!cryp->caps->padding_wa) xd = be32_to_cpu(bd[i]); - stm32_cryp_write(cryp, CRYP_DIN, xd); + stm32_cryp_write(cryp, cryp->caps->din, xd); } /* Wait for end of processing */ @@ -518,13 +684,13 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg) /* Prepare next phase */ if (cryp->areq->assoclen) { cfg |= CR_PH_HEADER | CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* Write first (special) block (may move to next phase [payload]) */ stm32_cryp_write_ccm_first_header(cryp); } else if (stm32_cryp_get_input_text_len(cryp)) { cfg |= CR_PH_PAYLOAD; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } return 0; @@ -538,7 +704,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) pm_runtime_get_sync(cryp->dev); /* Disable interrupt */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); /* Set configuration */ cfg = CR_DATA8 | CR_FFLUSH; @@ -566,7 +732,12 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) if (is_decrypt(cryp) && ((hw_mode == CR_AES_ECB) || (hw_mode == CR_AES_CBC))) { /* Configure in key preparation mode */ - stm32_cryp_write(cryp, CRYP_CR, cfg | CR_AES_KP); + if (cryp->caps->kp_mode) + stm32_cryp_write(cryp, cryp->caps->cr, + cfg | CR_AES_KP); + else + stm32_cryp_write(cryp, + cryp->caps->cr, cfg | CR_AES_ECB | CR_KSE); /* Set key only after full configuration done */ stm32_cryp_hw_write_key(cryp); @@ -583,14 +754,14 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp) cfg |= hw_mode | CR_DEC_NOT_ENC; /* Apply updated config (Decrypt + algo) and flush */ - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } else { cfg |= hw_mode; if (is_decrypt(cryp)) cfg |= CR_DEC_NOT_ENC; /* Apply config and flush */ - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* Set key only after configuration done */ stm32_cryp_hw_write_key(cryp); @@ -649,7 +820,7 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err) static int stm32_cryp_cpu_start(struct stm32_cryp *cryp) { /* Enable interrupt and let the IRQ handler do everything */ - stm32_cryp_write(cryp, CRYP_IMSCR, IMSCR_IN | IMSCR_OUT); + stm32_cryp_write(cryp, cryp->caps->imsc, IMSCR_IN | IMSCR_OUT); return 0; } @@ -1137,14 +1308,14 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) int ret = 0; /* Update Config */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_PH_MASK; cfg |= CR_PH_FINAL; cfg &= ~CR_DEC_NOT_ENC; cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); if (is_gcm(cryp)) { /* GCM: write aad and payload size (in bits) */ @@ -1152,8 +1323,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (cryp->caps->swap_final) size_bit = (__force u32)cpu_to_be32(size_bit); - stm32_cryp_write(cryp, CRYP_DIN, 0); - stm32_cryp_write(cryp, CRYP_DIN, size_bit); + stm32_cryp_write(cryp, cryp->caps->din, 0); + stm32_cryp_write(cryp, cryp->caps->din, size_bit); size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen : cryp->areq->cryptlen - cryp->authsize; @@ -1161,8 +1332,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (cryp->caps->swap_final) size_bit = (__force u32)cpu_to_be32(size_bit); - stm32_cryp_write(cryp, CRYP_DIN, 0); - stm32_cryp_write(cryp, CRYP_DIN, size_bit); + stm32_cryp_write(cryp, cryp->caps->din, 0); + stm32_cryp_write(cryp, cryp->caps->din, size_bit); } else { /* CCM: write CTR0 */ u32 iv32[AES_BLOCK_32]; @@ -1177,7 +1348,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) if (!cryp->caps->padding_wa) xiv = be32_to_cpu(biv[i]); - stm32_cryp_write(cryp, CRYP_DIN, xiv); + stm32_cryp_write(cryp, cryp->caps->din, xiv); } } @@ -1193,7 +1364,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) /* Get and write tag */ for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); + out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1); } else { @@ -1203,7 +1374,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0); for (i = 0; i < AES_BLOCK_32; i++) - out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT); + out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout); if (crypto_memneq(in_tag, out_tag, cryp->authsize)) ret = -EBADMSG; @@ -1211,7 +1382,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp) /* Disable cryp */ cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); return ret; } @@ -1227,19 +1398,19 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp) */ crypto_inc((u8 *)cryp->last_ctr, sizeof(cryp->last_ctr)); - cr = stm32_cryp_read(cryp, CRYP_CR); - stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN); + cr = stm32_cryp_read(cryp, cryp->caps->cr); + stm32_cryp_write(cryp, cryp->caps->cr, cr & ~CR_CRYPEN); stm32_cryp_hw_write_iv(cryp, cryp->last_ctr); - stm32_cryp_write(cryp, CRYP_CR, cr); + stm32_cryp_write(cryp, cryp->caps->cr, cr); } /* The IV registers are BE */ - cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR)); - cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR)); - cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR)); - cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR)); + cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l)); + cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r)); + cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l)); + cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r)); } static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) @@ -1248,7 +1419,7 @@ static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp) u32 block[AES_BLOCK_32]; for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1264,7 +1435,7 @@ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp) scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_in), 0); for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in); } @@ -1278,22 +1449,22 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) /* 'Special workaround' procedure described in the datasheet */ /* a) disable ip */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); - cfg = stm32_cryp_read(cryp, CRYP_CR); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) Update IV1R */ - stm32_cryp_write(cryp, CRYP_IV1RR, cryp->gcm_ctr - 2); + stm32_cryp_write(cryp, cryp->caps->iv1r, cryp->gcm_ctr - 2); /* c) change mode to CTR */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CTR; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* a) enable IP */ cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); @@ -1310,7 +1481,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) * block value */ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1320,16 +1491,16 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) /* d) change mode back to AES GCM */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_GCM; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* e) change phase to Final */ cfg &= ~CR_PH_MASK; cfg |= CR_PH_FINAL; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* f) write padded data */ for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); /* g) Empty fifo out */ err = stm32_cryp_wait_output(cryp); @@ -1339,7 +1510,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp) } for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_read(cryp, CRYP_DOUT); + stm32_cryp_read(cryp, cryp->caps->dout); /* h) run the he normal Final phase */ stm32_cryp_finish_req(cryp, 0); @@ -1350,13 +1521,13 @@ static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp) u32 cfg; /* disable ip, set NPBLB and reneable ip */ - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); cfg |= (cryp->hw_blocksize - cryp->payload_in) << CR_NBPBL_SHIFT; cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); } static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) @@ -1370,11 +1541,11 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* 'Special workaround' procedure described in the datasheet */ /* a) disable ip */ - stm32_cryp_write(cryp, CRYP_IMSCR, 0); + stm32_cryp_write(cryp, cryp->caps->imsc, 0); - cfg = stm32_cryp_read(cryp, CRYP_CR); + cfg = stm32_cryp_read(cryp, cryp->caps->cr); cfg &= ~CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) get IV1 from CRYP_CSGCMCCM7 */ iv1tmp = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + 7 * 4); @@ -1384,23 +1555,23 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) cstmp1[i] = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + i * 4); /* d) Write IV1R */ - stm32_cryp_write(cryp, CRYP_IV1RR, iv1tmp); + stm32_cryp_write(cryp, cryp->caps->iv1r, iv1tmp); /* e) change mode to CTR */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CTR; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* a) enable IP */ cfg |= CR_CRYPEN; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* b) pad and write the last block */ stm32_cryp_irq_write_block(cryp); /* wait end of process */ err = stm32_cryp_wait_output(cryp); if (err) { - dev_err(cryp->dev, "Timeout (wite ccm padded data)\n"); + dev_err(cryp->dev, "Timeout (write ccm padded data)\n"); return stm32_cryp_finish_req(cryp, err); } @@ -1410,7 +1581,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) * block value */ for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++) - block[i] = stm32_cryp_read(cryp, CRYP_DOUT); + block[i] = stm32_cryp_read(cryp, cryp->caps->dout); scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize, cryp->payload_out), 1); @@ -1423,24 +1594,24 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp) /* e) change mode back to AES CCM */ cfg &= ~CR_ALGO_MASK; cfg |= CR_AES_CCM; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* f) change phase to header */ cfg &= ~CR_PH_MASK; cfg |= CR_PH_HEADER; - stm32_cryp_write(cryp, CRYP_CR, cfg); + stm32_cryp_write(cryp, cryp->caps->cr, cfg); /* g) XOR and write padded data */ for (i = 0; i < ARRAY_SIZE(block); i++) { block[i] ^= cstmp1[i]; block[i] ^= cstmp2[i]; - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); } /* h) wait for completion */ err = stm32_cryp_wait_busy(cryp); if (err) - dev_err(cryp->dev, "Timeout (wite ccm padded data)\n"); + dev_err(cryp->dev, "Timeout (write ccm padded data)\n"); /* i) run the he normal Final phase */ stm32_cryp_finish_req(cryp, err); @@ -1497,7 +1668,7 @@ static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp) scatterwalk_copychunks(block, &cryp->in_walk, written, 0); for (i = 0; i < AES_BLOCK_32; i++) - stm32_cryp_write(cryp, CRYP_DIN, block[i]); + stm32_cryp_write(cryp, cryp->caps->din, block[i]); cryp->header_in -= written; @@ -1508,7 +1679,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) { struct stm32_cryp *cryp = arg; u32 ph; - u32 it_mask = stm32_cryp_read(cryp, CRYP_IMSCR); + u32 it_mask = stm32_cryp_read(cryp, cryp->caps->imsc); if (cryp->irq_status & MISR_OUT) /* Output FIFO IRQ: read data */ @@ -1516,7 +1687,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) if (cryp->irq_status & MISR_IN) { if (is_gcm(cryp) || is_ccm(cryp)) { - ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK; + ph = stm32_cryp_read(cryp, cryp->caps->cr) & CR_PH_MASK; if (unlikely(ph == CR_PH_HEADER)) /* Write Header */ stm32_cryp_irq_write_gcmccm_header(cryp); @@ -1536,7 +1707,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg) it_mask &= ~IMSCR_IN; if (!cryp->payload_out) it_mask &= ~IMSCR_OUT; - stm32_cryp_write(cryp, CRYP_IMSCR, it_mask); + stm32_cryp_write(cryp, cryp->caps->imsc, it_mask); if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out) stm32_cryp_finish_req(cryp, 0); @@ -1548,7 +1719,7 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg) { struct stm32_cryp *cryp = arg; - cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR); + cryp->irq_status = stm32_cryp_read(cryp, cryp->caps->mis); return IRQ_WAKE_THREAD; } @@ -1722,17 +1893,74 @@ static struct aead_alg aead_algs[] = { }, }; +static const struct stm32_cryp_caps ux500_data = { + .aeads_support = false, + .linear_aes_key = true, + .kp_mode = false, + .iv_protection = true, + .swap_final = true, + .padding_wa = true, + .cr = UX500_CRYP_CR, + .sr = UX500_CRYP_SR, + .din = UX500_CRYP_DIN, + .dout = UX500_CRYP_DOUT, + .imsc = UX500_CRYP_IMSC, + .mis = UX500_CRYP_MIS, + .k1l = UX500_CRYP_K1L, + .k1r = UX500_CRYP_K1R, + .k3r = UX500_CRYP_K3R, + .iv0l = UX500_CRYP_IV0L, + .iv0r = UX500_CRYP_IV0R, + .iv1l = UX500_CRYP_IV1L, + .iv1r = UX500_CRYP_IV1R, +}; + static const struct stm32_cryp_caps f7_data = { + .aeads_support = true, + .linear_aes_key = false, + .kp_mode = true, + .iv_protection = false, .swap_final = true, .padding_wa = true, + .cr = CRYP_CR, + .sr = CRYP_SR, + .din = CRYP_DIN, + .dout = CRYP_DOUT, + .imsc = CRYP_IMSCR, + .mis = CRYP_MISR, + .k1l = CRYP_K1LR, + .k1r = CRYP_K1RR, + .k3r = CRYP_K3RR, + .iv0l = CRYP_IV0LR, + .iv0r = CRYP_IV0RR, + .iv1l = CRYP_IV1LR, + .iv1r = CRYP_IV1RR, }; static const struct stm32_cryp_caps mp1_data = { + .aeads_support = true, + .linear_aes_key = false, + .kp_mode = true, + .iv_protection = false, .swap_final = false, .padding_wa = false, + .cr = CRYP_CR, + .sr = CRYP_SR, + .din = CRYP_DIN, + .dout = CRYP_DOUT, + .imsc = CRYP_IMSCR, + .mis = CRYP_MISR, + .k1l = CRYP_K1LR, + .k1r = CRYP_K1RR, + .k3r = CRYP_K3RR, + .iv0l = CRYP_IV0LR, + .iv0r = CRYP_IV0RR, + .iv1l = CRYP_IV1LR, + .iv1r = CRYP_IV1RR, }; static const struct of_device_id stm32_dt_ids[] = { + { .compatible = "stericsson,ux500-cryp", .data = &ux500_data}, { .compatible = "st,stm32f756-cryp", .data = &f7_data}, { .compatible = "st,stm32mp1-cryp", .data = &mp1_data}, {}, @@ -1829,9 +2057,11 @@ static int stm32_cryp_probe(struct platform_device *pdev) goto err_algs; } - ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); - if (ret) - goto err_aead_algs; + if (cryp->caps->aeads_support) { + ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs)); + if (ret) + goto err_aead_algs; + } dev_info(dev, "Initialized\n"); @@ -1869,7 +2099,8 @@ static int stm32_cryp_remove(struct platform_device *pdev) if (ret < 0) return ret; - crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs)); + if (cryp->caps->aeads_support) + crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs)); crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs)); crypto_engine_exit(cryp->engine); diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index c9ad6c21309018603cdde5cfd903474f7a37c5d5..71db6450b6aa4038f5d81b21c2aac44550c6c19e 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1999,7 +1999,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) /* Buffer up to one whole block */ nents = sg_nents_for_len(areq->src, nbytes); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_copy_to_buffer(areq->src, nents, @@ -2040,7 +2040,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) offset = nbytes_to_hash - req_ctx->nbuf; nents = sg_nents_for_len(areq->src, offset); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_copy_to_buffer(areq->src, nents, @@ -2054,7 +2054,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes) if (to_hash_later) { nents = sg_nents_for_len(areq->src, nbytes); if (nents < 0) { - dev_err(ctx->dev, "Invalid number of src SG.\n"); + dev_err(dev, "Invalid number of src SG.\n"); return nents; } sg_pcopy_to_buffer(areq->src, nents, diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h index 32825119e8805f24174e92458074c7c32a84153f..1a93ee355929d705e6e4222b53b1d0345bdf46e2 100644 --- a/drivers/crypto/talitos.h +++ b/drivers/crypto/talitos.h @@ -65,8 +65,8 @@ struct talitos_edesc { dma_addr_t dma_link_tbl; struct talitos_desc desc; union { - struct talitos_ptr link_tbl[0]; - u8 buf[0]; + DECLARE_FLEX_ARRAY(struct talitos_ptr, link_tbl); + DECLARE_FLEX_ARRAY(u8, buf); }; }; diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig index f56d65c56ccf78579efb822a175f0ebc0276fd98..dcbd7404768f182bd78e485302664eeef0dd538a 100644 --- a/drivers/crypto/ux500/Kconfig +++ b/drivers/crypto/ux500/Kconfig @@ -4,16 +4,6 @@ # Author: Shujuan Chen (shujuan.chen@stericsson.com) # -config CRYPTO_DEV_UX500_CRYP - tristate "UX500 crypto driver for CRYP block" - depends on CRYPTO_DEV_UX500 - select CRYPTO_ALGAPI - select CRYPTO_SKCIPHER - select CRYPTO_LIB_DES - help - This selects the crypto driver for the UX500_CRYP hardware. It supports - AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes. - config CRYPTO_DEV_UX500_HASH tristate "UX500 crypto driver for HASH block" depends on CRYPTO_DEV_UX500 diff --git a/drivers/crypto/ux500/Makefile b/drivers/crypto/ux500/Makefile index f014eb01710a609111c7766bead6e865b5556b45..f1aa4edf66f4f496952560e71341779c01d07d52 100644 --- a/drivers/crypto/ux500/Makefile +++ b/drivers/crypto/ux500/Makefile @@ -5,4 +5,3 @@ # obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += hash/ -obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += cryp/ diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile deleted file mode 100644 index 3e67531f484cd75f5f8771f821918c1dc2240155..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -#/* -# * Copyright (C) ST-Ericsson SA 2010 -# * Author: shujuan.chen@stericsson.com for ST-Ericsson. -# */ - -ccflags-$(CONFIG_CRYPTO_DEV_UX500_DEBUG) += -DDEBUG - -obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o -ux500_cryp-objs := cryp.o cryp_irq.o cryp_core.o diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c deleted file mode 100644 index 759d0d9786fd19019728a212d76f1d70040f8ae8..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp.c +++ /dev/null @@ -1,394 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - */ - -#include -#include -#include - -#include "cryp_p.h" -#include "cryp.h" - -/* - * cryp_wait_until_done - wait until the device logic is not busy - */ -void cryp_wait_until_done(struct cryp_device_data *device_data) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); -} - -/** - * cryp_check - This routine checks Peripheral and PCell Id - * @device_data: Pointer to the device data struct for base address. - */ -int cryp_check(struct cryp_device_data *device_data) -{ - int peripheralid2 = 0; - - if (NULL == device_data) - return -EINVAL; - - peripheralid2 = readl_relaxed(&device_data->base->periphId2); - - if (peripheralid2 != CRYP_PERIPHERAL_ID2_DB8500) - return -EPERM; - - /* Check Peripheral and Pcell Id Register for CRYP */ - if ((CRYP_PERIPHERAL_ID0 == - readl_relaxed(&device_data->base->periphId0)) - && (CRYP_PERIPHERAL_ID1 == - readl_relaxed(&device_data->base->periphId1)) - && (CRYP_PERIPHERAL_ID3 == - readl_relaxed(&device_data->base->periphId3)) - && (CRYP_PCELL_ID0 == - readl_relaxed(&device_data->base->pcellId0)) - && (CRYP_PCELL_ID1 == - readl_relaxed(&device_data->base->pcellId1)) - && (CRYP_PCELL_ID2 == - readl_relaxed(&device_data->base->pcellId2)) - && (CRYP_PCELL_ID3 == - readl_relaxed(&device_data->base->pcellId3))) { - return 0; - } - - return -EPERM; -} - -/** - * cryp_activity - This routine enables/disable the cryptography function. - * @device_data: Pointer to the device data struct for base address. - * @cryp_crypen: Enable/Disable functionality - */ -void cryp_activity(struct cryp_device_data *device_data, - enum cryp_crypen cryp_crypen) -{ - CRYP_PUT_BITS(&device_data->base->cr, - cryp_crypen, - CRYP_CR_CRYPEN_POS, - CRYP_CR_CRYPEN_MASK); -} - -/** - * cryp_flush_inoutfifo - Resets both the input and the output FIFOs - * @device_data: Pointer to the device data struct for base address. - */ -void cryp_flush_inoutfifo(struct cryp_device_data *device_data) -{ - /* - * We always need to disable the hardware before trying to flush the - * FIFO. This is something that isn't written in the design - * specification, but we have been informed by the hardware designers - * that this must be done. - */ - cryp_activity(device_data, CRYP_CRYPEN_DISABLE); - cryp_wait_until_done(device_data); - - CRYP_SET_BITS(&device_data->base->cr, CRYP_CR_FFLUSH_MASK); - /* - * CRYP_SR_INFIFO_READY_MASK is the expected value on the status - * register when starting a new calculation, which means Input FIFO is - * not full and input FIFO is empty. - */ - while (readl_relaxed(&device_data->base->sr) != - CRYP_SR_INFIFO_READY_MASK) - cpu_relax(); -} - -/** - * cryp_set_configuration - This routine set the cr CRYP IP - * @device_data: Pointer to the device data struct for base address. - * @cryp_config: Pointer to the configuration parameter - * @control_register: The control register to be written later on. - */ -int cryp_set_configuration(struct cryp_device_data *device_data, - struct cryp_config *cryp_config, - u32 *control_register) -{ - u32 cr_for_kse; - - if (NULL == device_data || NULL == cryp_config) - return -EINVAL; - - *control_register |= (cryp_config->keysize << CRYP_CR_KEYSIZE_POS); - - /* Prepare key for decryption in AES_ECB and AES_CBC mode. */ - if ((CRYP_ALGORITHM_DECRYPT == cryp_config->algodir) && - ((CRYP_ALGO_AES_ECB == cryp_config->algomode) || - (CRYP_ALGO_AES_CBC == cryp_config->algomode))) { - cr_for_kse = *control_register; - /* - * This seems a bit odd, but it is indeed needed to set this to - * encrypt even though it is a decryption that we are doing. It - * also mentioned in the design spec that you need to do this. - * After the keyprepartion for decrypting is done you should set - * algodir back to decryption, which is done outside this if - * statement. - * - * According to design specification we should set mode ECB - * during key preparation even though we might be running CBC - * when enter this function. - * - * Writing to KSE_ENABLED will drop CRYPEN when key preparation - * is done. Therefore we need to set CRYPEN again outside this - * if statement when running decryption. - */ - cr_for_kse |= ((CRYP_ALGORITHM_ENCRYPT << CRYP_CR_ALGODIR_POS) | - (CRYP_ALGO_AES_ECB << CRYP_CR_ALGOMODE_POS) | - (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS) | - (KSE_ENABLED << CRYP_CR_KSE_POS)); - - writel_relaxed(cr_for_kse, &device_data->base->cr); - cryp_wait_until_done(device_data); - } - - *control_register |= - ((cryp_config->algomode << CRYP_CR_ALGOMODE_POS) | - (cryp_config->algodir << CRYP_CR_ALGODIR_POS)); - - return 0; -} - -/** - * cryp_configure_protection - set the protection bits in the CRYP logic. - * @device_data: Pointer to the device data struct for base address. - * @p_protect_config: Pointer to the protection mode and - * secure mode configuration - */ -int cryp_configure_protection(struct cryp_device_data *device_data, - struct cryp_protection_config *p_protect_config) -{ - if (NULL == p_protect_config) - return -EINVAL; - - CRYP_WRITE_BIT(&device_data->base->cr, - (u32) p_protect_config->secure_access, - CRYP_CR_SECURE_MASK); - CRYP_PUT_BITS(&device_data->base->cr, - p_protect_config->privilege_access, - CRYP_CR_PRLG_POS, - CRYP_CR_PRLG_MASK); - - return 0; -} - -/** - * cryp_is_logic_busy - returns the busy status of the CRYP logic - * @device_data: Pointer to the device data struct for base address. - */ -int cryp_is_logic_busy(struct cryp_device_data *device_data) -{ - return CRYP_TEST_BITS(&device_data->base->sr, - CRYP_SR_BUSY_MASK); -} - -/** - * cryp_configure_for_dma - configures the CRYP IP for DMA operation - * @device_data: Pointer to the device data struct for base address. - * @dma_req: Specifies the DMA request type value. - */ -void cryp_configure_for_dma(struct cryp_device_data *device_data, - enum cryp_dma_req_type dma_req) -{ - CRYP_SET_BITS(&device_data->base->dmacr, - (u32) dma_req); -} - -/** - * cryp_configure_key_values - configures the key values for CRYP operations - * @device_data: Pointer to the device data struct for base address. - * @key_reg_index: Key value index register - * @key_value: The key value struct - */ -int cryp_configure_key_values(struct cryp_device_data *device_data, - enum cryp_key_reg_index key_reg_index, - struct cryp_key_value key_value) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); - - switch (key_reg_index) { - case CRYP_KEY_REG_1: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_1_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_1_r); - break; - case CRYP_KEY_REG_2: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_2_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_2_r); - break; - case CRYP_KEY_REG_3: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_3_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_3_r); - break; - case CRYP_KEY_REG_4: - writel_relaxed(key_value.key_value_left, - &device_data->base->key_4_l); - writel_relaxed(key_value.key_value_right, - &device_data->base->key_4_r); - break; - default: - return -EINVAL; - } - - return 0; -} - -/** - * cryp_configure_init_vector - configures the initialization vector register - * @device_data: Pointer to the device data struct for base address. - * @init_vector_index: Specifies the index of the init vector. - * @init_vector_value: Specifies the value for the init vector. - */ -int cryp_configure_init_vector(struct cryp_device_data *device_data, - enum cryp_init_vector_index - init_vector_index, - struct cryp_init_vector_value - init_vector_value) -{ - while (cryp_is_logic_busy(device_data)) - cpu_relax(); - - switch (init_vector_index) { - case CRYP_INIT_VECTOR_INDEX_0: - writel_relaxed(init_vector_value.init_value_left, - &device_data->base->init_vect_0_l); - writel_relaxed(init_vector_value.init_value_right, - &device_data->base->init_vect_0_r); - break; - case CRYP_INIT_VECTOR_INDEX_1: - writel_relaxed(init_vector_value.init_value_left, - &device_data->base->init_vect_1_l); - writel_relaxed(init_vector_value.init_value_right, - &device_data->base->init_vect_1_r); - break; - default: - return -EINVAL; - } - - return 0; -} - -/** - * cryp_save_device_context - Store hardware registers and - * other device context parameter - * @device_data: Pointer to the device data struct for base address. - * @ctx: Crypto device context - * @cryp_mode: Mode: Polling, Interrupt or DMA - */ -void cryp_save_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx, - int cryp_mode) -{ - enum cryp_algo_mode algomode; - struct cryp_register __iomem *src_reg = device_data->base; - struct cryp_config *config = - (struct cryp_config *)device_data->current_ctx; - - /* - * Always start by disable the hardware and wait for it to finish the - * ongoing calculations before trying to reprogram it. - */ - cryp_activity(device_data, CRYP_CRYPEN_DISABLE); - cryp_wait_until_done(device_data); - - if (cryp_mode == CRYP_MODE_DMA) - cryp_configure_for_dma(device_data, CRYP_DMA_DISABLE_BOTH); - - if (CRYP_TEST_BITS(&src_reg->sr, CRYP_SR_IFEM_MASK) == 0) - ctx->din = readl_relaxed(&src_reg->din); - - ctx->cr = readl_relaxed(&src_reg->cr) & CRYP_CR_CONTEXT_SAVE_MASK; - - switch (config->keysize) { - case CRYP_KEY_SIZE_256: - ctx->key_4_l = readl_relaxed(&src_reg->key_4_l); - ctx->key_4_r = readl_relaxed(&src_reg->key_4_r); - fallthrough; - - case CRYP_KEY_SIZE_192: - ctx->key_3_l = readl_relaxed(&src_reg->key_3_l); - ctx->key_3_r = readl_relaxed(&src_reg->key_3_r); - fallthrough; - - case CRYP_KEY_SIZE_128: - ctx->key_2_l = readl_relaxed(&src_reg->key_2_l); - ctx->key_2_r = readl_relaxed(&src_reg->key_2_r); - fallthrough; - - default: - ctx->key_1_l = readl_relaxed(&src_reg->key_1_l); - ctx->key_1_r = readl_relaxed(&src_reg->key_1_r); - } - - /* Save IV for CBC mode for both AES and DES. */ - algomode = ((ctx->cr & CRYP_CR_ALGOMODE_MASK) >> CRYP_CR_ALGOMODE_POS); - if (algomode == CRYP_ALGO_TDES_CBC || - algomode == CRYP_ALGO_DES_CBC || - algomode == CRYP_ALGO_AES_CBC) { - ctx->init_vect_0_l = readl_relaxed(&src_reg->init_vect_0_l); - ctx->init_vect_0_r = readl_relaxed(&src_reg->init_vect_0_r); - ctx->init_vect_1_l = readl_relaxed(&src_reg->init_vect_1_l); - ctx->init_vect_1_r = readl_relaxed(&src_reg->init_vect_1_r); - } -} - -/** - * cryp_restore_device_context - Restore hardware registers and - * other device context parameter - * @device_data: Pointer to the device data struct for base address. - * @ctx: Crypto device context - */ -void cryp_restore_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx) -{ - struct cryp_register __iomem *reg = device_data->base; - struct cryp_config *config = - (struct cryp_config *)device_data->current_ctx; - - /* - * Fall through for all items in switch statement. DES is captured in - * the default. - */ - switch (config->keysize) { - case CRYP_KEY_SIZE_256: - writel_relaxed(ctx->key_4_l, ®->key_4_l); - writel_relaxed(ctx->key_4_r, ®->key_4_r); - fallthrough; - - case CRYP_KEY_SIZE_192: - writel_relaxed(ctx->key_3_l, ®->key_3_l); - writel_relaxed(ctx->key_3_r, ®->key_3_r); - fallthrough; - - case CRYP_KEY_SIZE_128: - writel_relaxed(ctx->key_2_l, ®->key_2_l); - writel_relaxed(ctx->key_2_r, ®->key_2_r); - fallthrough; - - default: - writel_relaxed(ctx->key_1_l, ®->key_1_l); - writel_relaxed(ctx->key_1_r, ®->key_1_r); - } - - /* Restore IV for CBC mode for AES and DES. */ - if (config->algomode == CRYP_ALGO_TDES_CBC || - config->algomode == CRYP_ALGO_DES_CBC || - config->algomode == CRYP_ALGO_AES_CBC) { - writel_relaxed(ctx->init_vect_0_l, ®->init_vect_0_l); - writel_relaxed(ctx->init_vect_0_r, ®->init_vect_0_r); - writel_relaxed(ctx->init_vect_1_l, ®->init_vect_1_l); - writel_relaxed(ctx->init_vect_1_r, ®->init_vect_1_r); - } -} diff --git a/drivers/crypto/ux500/cryp/cryp.h b/drivers/crypto/ux500/cryp/cryp.h deleted file mode 100644 index 59e1557a620a99864c2ab09f880a98ded4cc5751..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp.h +++ /dev/null @@ -1,315 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_H_ -#define _CRYP_H_ - -#include -#include -#include -#include - -#define DEV_DBG_NAME "crypX crypX:" - -/* CRYP enable/disable */ -enum cryp_crypen { - CRYP_CRYPEN_DISABLE = 0, - CRYP_CRYPEN_ENABLE = 1 -}; - -/* CRYP Start Computation enable/disable */ -enum cryp_start { - CRYP_START_DISABLE = 0, - CRYP_START_ENABLE = 1 -}; - -/* CRYP Init Signal enable/disable */ -enum cryp_init { - CRYP_INIT_DISABLE = 0, - CRYP_INIT_ENABLE = 1 -}; - -/* Cryp State enable/disable */ -enum cryp_state { - CRYP_STATE_DISABLE = 0, - CRYP_STATE_ENABLE = 1 -}; - -/* Key preparation bit enable */ -enum cryp_key_prep { - KSE_DISABLED = 0, - KSE_ENABLED = 1 -}; - -/* Key size for AES */ -#define CRYP_KEY_SIZE_128 (0) -#define CRYP_KEY_SIZE_192 (1) -#define CRYP_KEY_SIZE_256 (2) - -/* AES modes */ -enum cryp_algo_mode { - CRYP_ALGO_TDES_ECB, - CRYP_ALGO_TDES_CBC, - CRYP_ALGO_DES_ECB, - CRYP_ALGO_DES_CBC, - CRYP_ALGO_AES_ECB, - CRYP_ALGO_AES_CBC, - CRYP_ALGO_AES_CTR, - CRYP_ALGO_AES_XTS -}; - -/* Cryp Encryption or Decryption */ -enum cryp_algorithm_dir { - CRYP_ALGORITHM_ENCRYPT, - CRYP_ALGORITHM_DECRYPT -}; - -/* Hardware access method */ -enum cryp_mode { - CRYP_MODE_POLLING, - CRYP_MODE_INTERRUPT, - CRYP_MODE_DMA -}; - -/** - * struct cryp_config - - * @keysize: Key size for AES - * @algomode: AES modes - * @algodir: Cryp Encryption or Decryption - * - * CRYP configuration structure to be passed to set configuration - */ -struct cryp_config { - int keysize; - enum cryp_algo_mode algomode; - enum cryp_algorithm_dir algodir; -}; - -/** - * struct cryp_protection_config - - * @privilege_access: Privileged cryp state enable/disable - * @secure_access: Secure cryp state enable/disable - * - * Protection configuration structure for setting privilage access - */ -struct cryp_protection_config { - enum cryp_state privilege_access; - enum cryp_state secure_access; -}; - -/* Cryp status */ -enum cryp_status_id { - CRYP_STATUS_BUSY = 0x10, - CRYP_STATUS_OUTPUT_FIFO_FULL = 0x08, - CRYP_STATUS_OUTPUT_FIFO_NOT_EMPTY = 0x04, - CRYP_STATUS_INPUT_FIFO_NOT_FULL = 0x02, - CRYP_STATUS_INPUT_FIFO_EMPTY = 0x01 -}; - -/* Cryp DMA interface */ -#define CRYP_DMA_TX_FIFO 0x08 -#define CRYP_DMA_RX_FIFO 0x10 - -enum cryp_dma_req_type { - CRYP_DMA_DISABLE_BOTH, - CRYP_DMA_ENABLE_IN_DATA, - CRYP_DMA_ENABLE_OUT_DATA, - CRYP_DMA_ENABLE_BOTH_DIRECTIONS -}; - -enum cryp_dma_channel { - CRYP_DMA_RX = 0, - CRYP_DMA_TX -}; - -/* Key registers */ -enum cryp_key_reg_index { - CRYP_KEY_REG_1, - CRYP_KEY_REG_2, - CRYP_KEY_REG_3, - CRYP_KEY_REG_4 -}; - -/* Key register left and right */ -struct cryp_key_value { - u32 key_value_left; - u32 key_value_right; -}; - -/* Cryp Initialization structure */ -enum cryp_init_vector_index { - CRYP_INIT_VECTOR_INDEX_0, - CRYP_INIT_VECTOR_INDEX_1 -}; - -/* struct cryp_init_vector_value - - * @init_value_left - * @init_value_right - * */ -struct cryp_init_vector_value { - u32 init_value_left; - u32 init_value_right; -}; - -/** - * struct cryp_device_context - structure for a cryp context. - * @cr: control register - * @dmacr: DMA control register - * @imsc: Interrupt mask set/clear register - * @key_1_l: Key 1l register - * @key_1_r: Key 1r register - * @key_2_l: Key 2l register - * @key_2_r: Key 2r register - * @key_3_l: Key 3l register - * @key_3_r: Key 3r register - * @key_4_l: Key 4l register - * @key_4_r: Key 4r register - * @init_vect_0_l: Initialization vector 0l register - * @init_vect_0_r: Initialization vector 0r register - * @init_vect_1_l: Initialization vector 1l register - * @init_vect_1_r: Initialization vector 0r register - * @din: Data in register - * @dout: Data out register - * - * CRYP power management specifc structure. - */ -struct cryp_device_context { - u32 cr; - u32 dmacr; - u32 imsc; - - u32 key_1_l; - u32 key_1_r; - u32 key_2_l; - u32 key_2_r; - u32 key_3_l; - u32 key_3_r; - u32 key_4_l; - u32 key_4_r; - - u32 init_vect_0_l; - u32 init_vect_0_r; - u32 init_vect_1_l; - u32 init_vect_1_r; - - u32 din; - u32 dout; -}; - -struct cryp_dma { - dma_cap_mask_t mask; - struct completion cryp_dma_complete; - struct dma_chan *chan_cryp2mem; - struct dma_chan *chan_mem2cryp; - struct stedma40_chan_cfg *cfg_cryp2mem; - struct stedma40_chan_cfg *cfg_mem2cryp; - int sg_src_len; - int sg_dst_len; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; - int nents_src; - int nents_dst; -}; - -/** - * struct cryp_device_data - structure for a cryp device. - * @base: Pointer to virtual base address of the cryp device. - * @phybase: Pointer to physical memory location of the cryp device. - * @dev: Pointer to the devices dev structure. - * @clk: Pointer to the device's clock control. - * @irq: IRQ number - * @pwr_regulator: Pointer to the device's power control. - * @power_status: Current status of the power. - * @ctx_lock: Lock for current_ctx. - * @current_ctx: Pointer to the currently allocated context. - * @list_node: For inclusion into a klist. - * @dma: The dma structure holding channel configuration. - * @power_state: TRUE = power state on, FALSE = power state off. - * @power_state_spinlock: Spinlock for power_state. - * @restore_dev_ctx: TRUE = saved ctx, FALSE = no saved ctx. - */ -struct cryp_device_data { - struct cryp_register __iomem *base; - phys_addr_t phybase; - struct device *dev; - struct clk *clk; - int irq; - struct regulator *pwr_regulator; - int power_status; - spinlock_t ctx_lock; - struct cryp_ctx *current_ctx; - struct klist_node list_node; - struct cryp_dma dma; - bool power_state; - spinlock_t power_state_spinlock; - bool restore_dev_ctx; -}; - -void cryp_wait_until_done(struct cryp_device_data *device_data); - -/* Initialization functions */ - -int cryp_check(struct cryp_device_data *device_data); - -void cryp_activity(struct cryp_device_data *device_data, - enum cryp_crypen cryp_crypen); - -void cryp_flush_inoutfifo(struct cryp_device_data *device_data); - -int cryp_set_configuration(struct cryp_device_data *device_data, - struct cryp_config *cryp_config, - u32 *control_register); - -void cryp_configure_for_dma(struct cryp_device_data *device_data, - enum cryp_dma_req_type dma_req); - -int cryp_configure_key_values(struct cryp_device_data *device_data, - enum cryp_key_reg_index key_reg_index, - struct cryp_key_value key_value); - -int cryp_configure_init_vector(struct cryp_device_data *device_data, - enum cryp_init_vector_index - init_vector_index, - struct cryp_init_vector_value - init_vector_value); - -int cryp_configure_protection(struct cryp_device_data *device_data, - struct cryp_protection_config *p_protect_config); - -/* Power management funtions */ -void cryp_save_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx, - int cryp_mode); - -void cryp_restore_device_context(struct cryp_device_data *device_data, - struct cryp_device_context *ctx); - -/* Data transfer and status bits. */ -int cryp_is_logic_busy(struct cryp_device_data *device_data); - -int cryp_get_status(struct cryp_device_data *device_data); - -/** - * cryp_write_indata - This routine writes 32 bit data into the data input - * register of the cryptography IP. - * @device_data: Pointer to the device data struct for base address. - * @write_data: Data to write. - */ -int cryp_write_indata(struct cryp_device_data *device_data, u32 write_data); - -/** - * cryp_read_outdata - This routine reads the data from the data output - * register of the CRYP logic - * @device_data: Pointer to the device data struct for base address. - * @read_data: Read the data from the output FIFO. - */ -int cryp_read_outdata(struct cryp_device_data *device_data, u32 *read_data); - -#endif /* _CRYP_H_ */ diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c deleted file mode 100644 index 5a57c9afd8c886931d12ff454d4f700a0a320ab1..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp_core.c +++ /dev/null @@ -1,1600 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Andreas Westin for ST-Ericsson. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include - -#include "cryp_p.h" -#include "cryp.h" - -#define CRYP_MAX_KEY_SIZE 32 -#define BYTES_PER_WORD 4 - -static int cryp_mode; -static atomic_t session_id; - -static struct stedma40_chan_cfg *mem_to_engine; -static struct stedma40_chan_cfg *engine_to_mem; - -/** - * struct cryp_driver_data - data specific to the driver. - * - * @device_list: A list of registered devices to choose from. - * @device_allocation: A semaphore initialized with number of devices. - */ -struct cryp_driver_data { - struct klist device_list; - struct semaphore device_allocation; -}; - -/** - * struct cryp_ctx - Crypto context - * @config: Crypto mode. - * @key: Key array. - * @keylen: Length of key. - * @iv: Pointer to initialization vector. - * @indata: Pointer to indata. - * @outdata: Pointer to outdata. - * @datalen: Length of indata. - * @outlen: Length of outdata. - * @blocksize: Size of blocks. - * @updated: Updated flag. - * @dev_ctx: Device dependent context. - * @device: Pointer to the device. - * @session_id: Atomic session ID. - */ -struct cryp_ctx { - struct cryp_config config; - u8 key[CRYP_MAX_KEY_SIZE]; - u32 keylen; - u8 *iv; - const u8 *indata; - u8 *outdata; - u32 datalen; - u32 outlen; - u32 blocksize; - u8 updated; - struct cryp_device_context dev_ctx; - struct cryp_device_data *device; - u32 session_id; -}; - -static struct cryp_driver_data driver_data; - -/** - * swap_bits_in_byte - mirror the bits in a byte - * @b: the byte to be mirrored - * - * The bits are swapped the following way: - * Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and - * nibble 2 (n2) bits 4-7. - * - * Nibble 1 (n1): - * (The "old" (moved) bit is replaced with a zero) - * 1. Move bit 6 and 7, 4 positions to the left. - * 2. Move bit 3 and 5, 2 positions to the left. - * 3. Move bit 1-4, 1 position to the left. - * - * Nibble 2 (n2): - * 1. Move bit 0 and 1, 4 positions to the right. - * 2. Move bit 2 and 4, 2 positions to the right. - * 3. Move bit 3-6, 1 position to the right. - * - * Combine the two nibbles to a complete and swapped byte. - */ - -static inline u8 swap_bits_in_byte(u8 b) -{ -#define R_SHIFT_4_MASK 0xc0 /* Bits 6 and 7, right shift 4 */ -#define R_SHIFT_2_MASK 0x28 /* (After right shift 4) Bits 3 and 5, - right shift 2 */ -#define R_SHIFT_1_MASK 0x1e /* (After right shift 2) Bits 1-4, - right shift 1 */ -#define L_SHIFT_4_MASK 0x03 /* Bits 0 and 1, left shift 4 */ -#define L_SHIFT_2_MASK 0x14 /* (After left shift 4) Bits 2 and 4, - left shift 2 */ -#define L_SHIFT_1_MASK 0x78 /* (After left shift 1) Bits 3-6, - left shift 1 */ - - u8 n1; - u8 n2; - - /* Swap most significant nibble */ - /* Right shift 4, bits 6 and 7 */ - n1 = ((b & R_SHIFT_4_MASK) >> 4) | (b & ~(R_SHIFT_4_MASK >> 4)); - /* Right shift 2, bits 3 and 5 */ - n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2)); - /* Right shift 1, bits 1-4 */ - n1 = (n1 & R_SHIFT_1_MASK) >> 1; - - /* Swap least significant nibble */ - /* Left shift 4, bits 0 and 1 */ - n2 = ((b & L_SHIFT_4_MASK) << 4) | (b & ~(L_SHIFT_4_MASK << 4)); - /* Left shift 2, bits 2 and 4 */ - n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2)); - /* Left shift 1, bits 3-6 */ - n2 = (n2 & L_SHIFT_1_MASK) << 1; - - return n1 | n2; -} - -static inline void swap_words_in_key_and_bits_in_byte(const u8 *in, - u8 *out, u32 len) -{ - unsigned int i = 0; - int j; - int index = 0; - - j = len - BYTES_PER_WORD; - while (j >= 0) { - for (i = 0; i < BYTES_PER_WORD; i++) { - index = len - j - BYTES_PER_WORD + i; - out[j + i] = - swap_bits_in_byte(in[index]); - } - j -= BYTES_PER_WORD; - } -} - -static void add_session_id(struct cryp_ctx *ctx) -{ - /* - * We never want 0 to be a valid value, since this is the default value - * for the software context. - */ - if (unlikely(atomic_inc_and_test(&session_id))) - atomic_inc(&session_id); - - ctx->session_id = atomic_read(&session_id); -} - -static irqreturn_t cryp_interrupt_handler(int irq, void *param) -{ - struct cryp_ctx *ctx; - int count; - struct cryp_device_data *device_data; - - if (param == NULL) { - BUG_ON(!param); - return IRQ_HANDLED; - } - - /* The device is coming from the one found in hw_crypt_noxts. */ - device_data = (struct cryp_device_data *)param; - - ctx = device_data->current_ctx; - - if (ctx == NULL) { - BUG_ON(!ctx); - return IRQ_HANDLED; - } - - dev_dbg(ctx->device->dev, "[%s] (len: %d) %s, ", __func__, ctx->outlen, - cryp_pending_irq_src(device_data, CRYP_IRQ_SRC_OUTPUT_FIFO) ? - "out" : "in"); - - if (cryp_pending_irq_src(device_data, - CRYP_IRQ_SRC_OUTPUT_FIFO)) { - if (ctx->outlen / ctx->blocksize > 0) { - count = ctx->blocksize / 4; - - readsl(&device_data->base->dout, ctx->outdata, count); - ctx->outdata += count; - ctx->outlen -= count; - - if (ctx->outlen == 0) { - cryp_disable_irq_src(device_data, - CRYP_IRQ_SRC_OUTPUT_FIFO); - } - } - } else if (cryp_pending_irq_src(device_data, - CRYP_IRQ_SRC_INPUT_FIFO)) { - if (ctx->datalen / ctx->blocksize > 0) { - count = ctx->blocksize / 4; - - writesl(&device_data->base->din, ctx->indata, count); - - ctx->indata += count; - ctx->datalen -= count; - - if (ctx->datalen == 0) - cryp_disable_irq_src(device_data, - CRYP_IRQ_SRC_INPUT_FIFO); - - if (ctx->config.algomode == CRYP_ALGO_AES_XTS) { - CRYP_PUT_BITS(&device_data->base->cr, - CRYP_START_ENABLE, - CRYP_CR_START_POS, - CRYP_CR_START_MASK); - - cryp_wait_until_done(device_data); - } - } - } - - return IRQ_HANDLED; -} - -static int mode_is_aes(enum cryp_algo_mode mode) -{ - return CRYP_ALGO_AES_ECB == mode || - CRYP_ALGO_AES_CBC == mode || - CRYP_ALGO_AES_CTR == mode || - CRYP_ALGO_AES_XTS == mode; -} - -static int cfg_iv(struct cryp_device_data *device_data, u32 left, u32 right, - enum cryp_init_vector_index index) -{ - struct cryp_init_vector_value vector_value; - - dev_dbg(device_data->dev, "[%s]", __func__); - - vector_value.init_value_left = left; - vector_value.init_value_right = right; - - return cryp_configure_init_vector(device_data, - index, - vector_value); -} - -static int cfg_ivs(struct cryp_device_data *device_data, struct cryp_ctx *ctx) -{ - int i; - int status = 0; - int num_of_regs = ctx->blocksize / 8; - __be32 *civ = (__be32 *)ctx->iv; - u32 iv[AES_BLOCK_SIZE / 4]; - - dev_dbg(device_data->dev, "[%s]", __func__); - - /* - * Since we loop on num_of_regs we need to have a check in case - * someone provides an incorrect blocksize which would force calling - * cfg_iv with i greater than 2 which is an error. - */ - if (num_of_regs > 2) { - dev_err(device_data->dev, "[%s] Incorrect blocksize %d", - __func__, ctx->blocksize); - return -EINVAL; - } - - for (i = 0; i < ctx->blocksize / 4; i++) - iv[i] = be32_to_cpup(civ + i); - - for (i = 0; i < num_of_regs; i++) { - status = cfg_iv(device_data, iv[i*2], iv[i*2+1], - (enum cryp_init_vector_index) i); - if (status != 0) - return status; - } - return status; -} - -static int set_key(struct cryp_device_data *device_data, - u32 left_key, - u32 right_key, - enum cryp_key_reg_index index) -{ - struct cryp_key_value key_value; - int cryp_error; - - dev_dbg(device_data->dev, "[%s]", __func__); - - key_value.key_value_left = left_key; - key_value.key_value_right = right_key; - - cryp_error = cryp_configure_key_values(device_data, - index, - key_value); - if (cryp_error != 0) - dev_err(device_data->dev, "[%s]: " - "cryp_configure_key_values() failed!", __func__); - - return cryp_error; -} - -static int cfg_keys(struct cryp_ctx *ctx) -{ - int i; - int num_of_regs = ctx->keylen / 8; - u32 swapped_key[CRYP_MAX_KEY_SIZE / 4]; - __be32 *ckey = (__be32 *)ctx->key; - int cryp_error = 0; - - dev_dbg(ctx->device->dev, "[%s]", __func__); - - if (mode_is_aes(ctx->config.algomode)) { - swap_words_in_key_and_bits_in_byte((u8 *)ckey, - (u8 *)swapped_key, - ctx->keylen); - } else { - for (i = 0; i < ctx->keylen / 4; i++) - swapped_key[i] = be32_to_cpup(ckey + i); - } - - for (i = 0; i < num_of_regs; i++) { - cryp_error = set_key(ctx->device, - swapped_key[i * 2], - swapped_key[i * 2 + 1], - (enum cryp_key_reg_index) i); - - if (cryp_error != 0) { - dev_err(ctx->device->dev, "[%s]: set_key() failed!", - __func__); - return cryp_error; - } - } - return cryp_error; -} - -static int cryp_setup_context(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - u32 control_register = CRYP_CR_DEFAULT; - - switch (cryp_mode) { - case CRYP_MODE_INTERRUPT: - writel_relaxed(CRYP_IMSC_DEFAULT, &device_data->base->imsc); - break; - - case CRYP_MODE_DMA: - writel_relaxed(CRYP_DMACR_DEFAULT, &device_data->base->dmacr); - break; - - default: - break; - } - - if (ctx->updated == 0) { - cryp_flush_inoutfifo(device_data); - if (cfg_keys(ctx) != 0) { - dev_err(ctx->device->dev, "[%s]: cfg_keys failed!", - __func__); - return -EINVAL; - } - - if (ctx->iv && - CRYP_ALGO_AES_ECB != ctx->config.algomode && - CRYP_ALGO_DES_ECB != ctx->config.algomode && - CRYP_ALGO_TDES_ECB != ctx->config.algomode) { - if (cfg_ivs(device_data, ctx) != 0) - return -EPERM; - } - - cryp_set_configuration(device_data, &ctx->config, - &control_register); - add_session_id(ctx); - } else if (ctx->updated == 1 && - ctx->session_id != atomic_read(&session_id)) { - cryp_flush_inoutfifo(device_data); - cryp_restore_device_context(device_data, &ctx->dev_ctx); - - add_session_id(ctx); - control_register = ctx->dev_ctx.cr; - } else - control_register = ctx->dev_ctx.cr; - - writel(control_register | - (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS), - &device_data->base->cr); - - return 0; -} - -static int cryp_get_device_data(struct cryp_ctx *ctx, - struct cryp_device_data **device_data) -{ - int ret; - struct klist_iter device_iterator; - struct klist_node *device_node; - struct cryp_device_data *local_device_data = NULL; - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - /* Wait until a device is available */ - ret = down_interruptible(&driver_data.device_allocation); - if (ret) - return ret; /* Interrupted */ - - /* Select a device */ - klist_iter_init(&driver_data.device_list, &device_iterator); - - device_node = klist_next(&device_iterator); - while (device_node) { - local_device_data = container_of(device_node, - struct cryp_device_data, list_node); - spin_lock(&local_device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (local_device_data->current_ctx) { - device_node = klist_next(&device_iterator); - } else { - local_device_data->current_ctx = ctx; - ctx->device = local_device_data; - spin_unlock(&local_device_data->ctx_lock); - break; - } - spin_unlock(&local_device_data->ctx_lock); - } - klist_iter_exit(&device_iterator); - - if (!device_node) { - /** - * No free device found. - * Since we allocated a device with down_interruptible, this - * should not be able to happen. - * Number of available devices, which are contained in - * device_allocation, is therefore decremented by not doing - * an up(device_allocation). - */ - return -EBUSY; - } - - *device_data = local_device_data; - - return 0; -} - -static void cryp_dma_setup_channel(struct cryp_device_data *device_data, - struct device *dev) -{ - struct dma_slave_config mem2cryp = { - .direction = DMA_MEM_TO_DEV, - .dst_addr = device_data->phybase + CRYP_DMA_TX_FIFO, - .dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, - .dst_maxburst = 4, - }; - struct dma_slave_config cryp2mem = { - .direction = DMA_DEV_TO_MEM, - .src_addr = device_data->phybase + CRYP_DMA_RX_FIFO, - .src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES, - .src_maxburst = 4, - }; - - dma_cap_zero(device_data->dma.mask); - dma_cap_set(DMA_SLAVE, device_data->dma.mask); - - device_data->dma.cfg_mem2cryp = mem_to_engine; - device_data->dma.chan_mem2cryp = - dma_request_channel(device_data->dma.mask, - stedma40_filter, - device_data->dma.cfg_mem2cryp); - - device_data->dma.cfg_cryp2mem = engine_to_mem; - device_data->dma.chan_cryp2mem = - dma_request_channel(device_data->dma.mask, - stedma40_filter, - device_data->dma.cfg_cryp2mem); - - dmaengine_slave_config(device_data->dma.chan_mem2cryp, &mem2cryp); - dmaengine_slave_config(device_data->dma.chan_cryp2mem, &cryp2mem); - - init_completion(&device_data->dma.cryp_dma_complete); -} - -static void cryp_dma_out_callback(void *data) -{ - struct cryp_ctx *ctx = (struct cryp_ctx *) data; - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - complete(&ctx->device->dma.cryp_dma_complete); -} - -static int cryp_set_dma_transfer(struct cryp_ctx *ctx, - struct scatterlist *sg, - int len, - enum dma_data_direction direction) -{ - struct dma_async_tx_descriptor *desc; - struct dma_chan *channel = NULL; - dma_cookie_t cookie; - - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - if (unlikely(!IS_ALIGNED((unsigned long)sg, 4))) { - dev_err(ctx->device->dev, "[%s]: Data in sg list isn't " - "aligned! Addr: 0x%08lx", __func__, (unsigned long)sg); - return -EFAULT; - } - - switch (direction) { - case DMA_TO_DEVICE: - channel = ctx->device->dma.chan_mem2cryp; - ctx->device->dma.sg_src = sg; - ctx->device->dma.sg_src_len = dma_map_sg(channel->device->dev, - ctx->device->dma.sg_src, - ctx->device->dma.nents_src, - direction); - - if (!ctx->device->dma.sg_src_len) { - dev_dbg(ctx->device->dev, - "[%s]: Could not map the sg list (TO_DEVICE)", - __func__); - return -EFAULT; - } - - dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer " - "(TO_DEVICE)", __func__); - - desc = dmaengine_prep_slave_sg(channel, - ctx->device->dma.sg_src, - ctx->device->dma.sg_src_len, - DMA_MEM_TO_DEV, DMA_CTRL_ACK); - break; - - case DMA_FROM_DEVICE: - channel = ctx->device->dma.chan_cryp2mem; - ctx->device->dma.sg_dst = sg; - ctx->device->dma.sg_dst_len = dma_map_sg(channel->device->dev, - ctx->device->dma.sg_dst, - ctx->device->dma.nents_dst, - direction); - - if (!ctx->device->dma.sg_dst_len) { - dev_dbg(ctx->device->dev, - "[%s]: Could not map the sg list (FROM_DEVICE)", - __func__); - return -EFAULT; - } - - dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer " - "(FROM_DEVICE)", __func__); - - desc = dmaengine_prep_slave_sg(channel, - ctx->device->dma.sg_dst, - ctx->device->dma.sg_dst_len, - DMA_DEV_TO_MEM, - DMA_CTRL_ACK | - DMA_PREP_INTERRUPT); - - desc->callback = cryp_dma_out_callback; - desc->callback_param = ctx; - break; - - default: - dev_dbg(ctx->device->dev, "[%s]: Invalid DMA direction", - __func__); - return -EFAULT; - } - - cookie = dmaengine_submit(desc); - if (dma_submit_error(cookie)) { - dev_dbg(ctx->device->dev, "[%s]: DMA submission failed\n", - __func__); - return cookie; - } - - dma_async_issue_pending(channel); - - return 0; -} - -static void cryp_dma_done(struct cryp_ctx *ctx) -{ - struct dma_chan *chan; - - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - chan = ctx->device->dma.chan_mem2cryp; - dmaengine_terminate_all(chan); - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_src, - ctx->device->dma.nents_src, DMA_TO_DEVICE); - - chan = ctx->device->dma.chan_cryp2mem; - dmaengine_terminate_all(chan); - dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_dst, - ctx->device->dma.nents_dst, DMA_FROM_DEVICE); -} - -static int cryp_dma_write(struct cryp_ctx *ctx, struct scatterlist *sg, - int len) -{ - int error = cryp_set_dma_transfer(ctx, sg, len, DMA_TO_DEVICE); - dev_dbg(ctx->device->dev, "[%s]: ", __func__); - - if (error) { - dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() " - "failed", __func__); - return error; - } - - return len; -} - -static int cryp_dma_read(struct cryp_ctx *ctx, struct scatterlist *sg, int len) -{ - int error = cryp_set_dma_transfer(ctx, sg, len, DMA_FROM_DEVICE); - if (error) { - dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() " - "failed", __func__); - return error; - } - - return len; -} - -static void cryp_polling_mode(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - int len = ctx->blocksize / BYTES_PER_WORD; - int remaining_length = ctx->datalen; - u32 *indata = (u32 *)ctx->indata; - u32 *outdata = (u32 *)ctx->outdata; - - while (remaining_length > 0) { - writesl(&device_data->base->din, indata, len); - indata += len; - remaining_length -= (len * BYTES_PER_WORD); - cryp_wait_until_done(device_data); - - readsl(&device_data->base->dout, outdata, len); - outdata += len; - cryp_wait_until_done(device_data); - } -} - -static int cryp_disable_power(struct device *dev, - struct cryp_device_data *device_data, - bool save_device_context) -{ - int ret = 0; - - dev_dbg(dev, "[%s]", __func__); - - spin_lock(&device_data->power_state_spinlock); - if (!device_data->power_state) - goto out; - - spin_lock(&device_data->ctx_lock); - if (save_device_context && device_data->current_ctx) { - cryp_save_device_context(device_data, - &device_data->current_ctx->dev_ctx, - cryp_mode); - device_data->restore_dev_ctx = true; - } - spin_unlock(&device_data->ctx_lock); - - clk_disable(device_data->clk); - ret = regulator_disable(device_data->pwr_regulator); - if (ret) - dev_err(dev, "[%s]: " - "regulator_disable() failed!", - __func__); - - device_data->power_state = false; - -out: - spin_unlock(&device_data->power_state_spinlock); - - return ret; -} - -static int cryp_enable_power( - struct device *dev, - struct cryp_device_data *device_data, - bool restore_device_context) -{ - int ret = 0; - - dev_dbg(dev, "[%s]", __func__); - - spin_lock(&device_data->power_state_spinlock); - if (!device_data->power_state) { - ret = regulator_enable(device_data->pwr_regulator); - if (ret) { - dev_err(dev, "[%s]: regulator_enable() failed!", - __func__); - goto out; - } - - ret = clk_enable(device_data->clk); - if (ret) { - dev_err(dev, "[%s]: clk_enable() failed!", - __func__); - regulator_disable(device_data->pwr_regulator); - goto out; - } - device_data->power_state = true; - } - - if (device_data->restore_dev_ctx) { - spin_lock(&device_data->ctx_lock); - if (restore_device_context && device_data->current_ctx) { - device_data->restore_dev_ctx = false; - cryp_restore_device_context(device_data, - &device_data->current_ctx->dev_ctx); - } - spin_unlock(&device_data->ctx_lock); - } -out: - spin_unlock(&device_data->power_state_spinlock); - - return ret; -} - -static int hw_crypt_noxts(struct cryp_ctx *ctx, - struct cryp_device_data *device_data) -{ - int ret = 0; - - const u8 *indata = ctx->indata; - u8 *outdata = ctx->outdata; - u32 datalen = ctx->datalen; - u32 outlen = datalen; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->outlen = ctx->datalen; - - if (unlikely(!IS_ALIGNED((unsigned long)indata, 4))) { - pr_debug(DEV_DBG_NAME " [%s]: Data isn't aligned! Addr: " - "0x%08lx", __func__, (unsigned long)indata); - return -EINVAL; - } - - ret = cryp_setup_context(ctx, device_data); - - if (ret) - goto out; - - if (cryp_mode == CRYP_MODE_INTERRUPT) { - cryp_enable_irq_src(device_data, CRYP_IRQ_SRC_INPUT_FIFO | - CRYP_IRQ_SRC_OUTPUT_FIFO); - - /* - * ctx->outlen is decremented in the cryp_interrupt_handler - * function. We had to add cpu_relax() (barrier) to make sure - * that gcc didn't optimze away this variable. - */ - while (ctx->outlen > 0) - cpu_relax(); - } else if (cryp_mode == CRYP_MODE_POLLING || - cryp_mode == CRYP_MODE_DMA) { - /* - * The reason for having DMA in this if case is that if we are - * running cryp_mode = 2, then we separate DMA routines for - * handling cipher/plaintext > blocksize, except when - * running the normal CRYPTO_ALG_TYPE_CIPHER, then we still use - * the polling mode. Overhead of doing DMA setup eats up the - * benefits using it. - */ - cryp_polling_mode(ctx, device_data); - } else { - dev_err(ctx->device->dev, "[%s]: Invalid operation mode!", - __func__); - ret = -EPERM; - goto out; - } - - cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode); - ctx->updated = 1; - -out: - ctx->indata = indata; - ctx->outdata = outdata; - ctx->datalen = datalen; - ctx->outlen = outlen; - - return ret; -} - -static int get_nents(struct scatterlist *sg, int nbytes) -{ - int nents = 0; - - while (nbytes > 0) { - nbytes -= sg->length; - sg = sg_next(sg); - nents++; - } - - return nents; -} - -static int ablk_dma_crypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - struct cryp_device_data *device_data; - - int bytes_written = 0; - int bytes_read = 0; - int ret; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->datalen = areq->cryptlen; - ctx->outlen = areq->cryptlen; - - ret = cryp_get_device_data(ctx, &device_data); - if (ret) - return ret; - - ret = cryp_setup_context(ctx, device_data); - if (ret) - goto out; - - /* We have the device now, so store the nents in the dma struct. */ - ctx->device->dma.nents_src = get_nents(areq->src, ctx->datalen); - ctx->device->dma.nents_dst = get_nents(areq->dst, ctx->outlen); - - /* Enable DMA in- and output. */ - cryp_configure_for_dma(device_data, CRYP_DMA_ENABLE_BOTH_DIRECTIONS); - - bytes_written = cryp_dma_write(ctx, areq->src, ctx->datalen); - bytes_read = cryp_dma_read(ctx, areq->dst, bytes_written); - - wait_for_completion(&ctx->device->dma.cryp_dma_complete); - cryp_dma_done(ctx); - - cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode); - ctx->updated = 1; - -out: - spin_lock(&device_data->ctx_lock); - device_data->current_ctx = NULL; - ctx->device = NULL; - spin_unlock(&device_data->ctx_lock); - - /* - * The down_interruptible part for this semaphore is called in - * cryp_get_device_data. - */ - up(&driver_data.device_allocation); - - if (unlikely(bytes_written != bytes_read)) - return -EPERM; - - return 0; -} - -static int ablk_crypt(struct skcipher_request *areq) -{ - struct skcipher_walk walk; - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - struct cryp_device_data *device_data; - unsigned long src_paddr; - unsigned long dst_paddr; - int ret; - int nbytes; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ret = cryp_get_device_data(ctx, &device_data); - if (ret) - goto out; - - ret = skcipher_walk_async(&walk, areq); - - if (ret) { - pr_err(DEV_DBG_NAME "[%s]: skcipher_walk_async() failed!", - __func__); - goto out; - } - - while ((nbytes = walk.nbytes) > 0) { - ctx->iv = walk.iv; - src_paddr = (page_to_phys(walk.src.phys.page) + walk.src.phys.offset); - ctx->indata = phys_to_virt(src_paddr); - - dst_paddr = (page_to_phys(walk.dst.phys.page) + walk.dst.phys.offset); - ctx->outdata = phys_to_virt(dst_paddr); - - ctx->datalen = nbytes - (nbytes % ctx->blocksize); - - ret = hw_crypt_noxts(ctx, device_data); - if (ret) - goto out; - - nbytes -= ctx->datalen; - ret = skcipher_walk_done(&walk, nbytes); - if (ret) - goto out; - } - -out: - /* Release the device */ - spin_lock(&device_data->ctx_lock); - device_data->current_ctx = NULL; - ctx->device = NULL; - spin_unlock(&device_data->ctx_lock); - - /* - * The down_interruptible part for this semaphore is called in - * cryp_get_device_data. - */ - up(&driver_data.device_allocation); - - return ret; -} - -static int aes_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - switch (keylen) { - case AES_KEYSIZE_128: - ctx->config.keysize = CRYP_KEY_SIZE_128; - break; - - case AES_KEYSIZE_192: - ctx->config.keysize = CRYP_KEY_SIZE_192; - break; - - case AES_KEYSIZE_256: - ctx->config.keysize = CRYP_KEY_SIZE_256; - break; - - default: - pr_err(DEV_DBG_NAME "[%s]: Unknown keylen!", __func__); - return -EINVAL; - } - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - - return 0; -} - -static int des_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - err = verify_skcipher_des_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - return 0; -} - -static int des3_skcipher_setkey(struct crypto_skcipher *cipher, - const u8 *key, unsigned int keylen) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - int err; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - err = verify_skcipher_des3_key(cipher, key); - if (err) - return err; - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - - ctx->updated = 0; - return 0; -} - -static int cryp_blk_encrypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->config.algodir = CRYP_ALGORITHM_ENCRYPT; - - /* - * DMA does not work for DES due to a hw bug */ - if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode)) - return ablk_dma_crypt(areq); - - /* For everything except DMA, we run the non DMA version. */ - return ablk_crypt(areq); -} - -static int cryp_blk_decrypt(struct skcipher_request *areq) -{ - struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq); - struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher); - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - ctx->config.algodir = CRYP_ALGORITHM_DECRYPT; - - /* DMA does not work for DES due to a hw bug */ - if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode)) - return ablk_dma_crypt(areq); - - /* For everything except DMA, we run the non DMA version. */ - return ablk_crypt(areq); -} - -struct cryp_algo_template { - enum cryp_algo_mode algomode; - struct skcipher_alg skcipher; -}; - -static int cryp_init_tfm(struct crypto_skcipher *tfm) -{ - struct cryp_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_alg *alg = crypto_skcipher_alg(tfm); - struct cryp_algo_template *cryp_alg = container_of(alg, - struct cryp_algo_template, - skcipher); - - ctx->config.algomode = cryp_alg->algomode; - ctx->blocksize = crypto_skcipher_blocksize(tfm); - - return 0; -} - -static struct cryp_algo_template cryp_algs[] = { - { - .algomode = CRYP_ALGO_AES_ECB, - .skcipher = { - .base.cra_name = "ecb(aes)", - .base.cra_driver_name = "ecb-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_AES_CBC, - .skcipher = { - .base.cra_name = "cbc(aes)", - .base.cra_driver_name = "cbc-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - .ivsize = AES_BLOCK_SIZE, - } - }, - { - .algomode = CRYP_ALGO_AES_CTR, - .skcipher = { - .base.cra_name = "ctr(aes)", - .base.cra_driver_name = "ctr-aes-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = aes_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - .ivsize = AES_BLOCK_SIZE, - .chunksize = AES_BLOCK_SIZE, - } - }, - { - .algomode = CRYP_ALGO_DES_ECB, - .skcipher = { - .base.cra_name = "ecb(des)", - .base.cra_driver_name = "ecb-des-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_TDES_ECB, - .skcipher = { - .base.cra_name = "ecb(des3_ede)", - .base.cra_driver_name = "ecb-des3_ede-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_DES_CBC, - .skcipher = { - .base.cra_name = "cbc(des)", - .base.cra_driver_name = "cbc-des-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES_KEY_SIZE, - .max_keysize = DES_KEY_SIZE, - .setkey = des_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .ivsize = DES_BLOCK_SIZE, - .init = cryp_init_tfm, - } - }, - { - .algomode = CRYP_ALGO_TDES_CBC, - .skcipher = { - .base.cra_name = "cbc(des3_ede)", - .base.cra_driver_name = "cbc-des3_ede-ux500", - .base.cra_priority = 300, - .base.cra_flags = CRYPTO_ALG_ASYNC, - .base.cra_blocksize = DES3_EDE_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct cryp_ctx), - .base.cra_alignmask = 3, - .base.cra_module = THIS_MODULE, - - .min_keysize = DES3_EDE_KEY_SIZE, - .max_keysize = DES3_EDE_KEY_SIZE, - .setkey = des3_skcipher_setkey, - .encrypt = cryp_blk_encrypt, - .decrypt = cryp_blk_decrypt, - .ivsize = DES3_EDE_BLOCK_SIZE, - .init = cryp_init_tfm, - } - } -}; - -/** - * cryp_algs_register_all - - */ -static int cryp_algs_register_all(void) -{ - int ret; - int i; - int count; - - pr_debug("[%s]", __func__); - - for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) { - ret = crypto_register_skcipher(&cryp_algs[i].skcipher); - if (ret) { - count = i; - pr_err("[%s] alg registration failed", - cryp_algs[i].skcipher.base.cra_driver_name); - goto unreg; - } - } - return 0; -unreg: - for (i = 0; i < count; i++) - crypto_unregister_skcipher(&cryp_algs[i].skcipher); - return ret; -} - -/** - * cryp_algs_unregister_all - - */ -static void cryp_algs_unregister_all(void) -{ - int i; - - pr_debug(DEV_DBG_NAME " [%s]", __func__); - - for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) - crypto_unregister_skcipher(&cryp_algs[i].skcipher); -} - -static int ux500_cryp_probe(struct platform_device *pdev) -{ - int ret; - struct resource *res; - struct cryp_device_data *device_data; - struct cryp_protection_config prot = { - .privilege_access = CRYP_STATE_ENABLE - }; - struct device *dev = &pdev->dev; - - dev_dbg(dev, "[%s]", __func__); - device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL); - if (!device_data) { - ret = -ENOMEM; - goto out; - } - - device_data->dev = dev; - device_data->current_ctx = NULL; - - /* Grab the DMA configuration from platform data. */ - mem_to_engine = &((struct cryp_platform_data *) - dev->platform_data)->mem_to_engine; - engine_to_mem = &((struct cryp_platform_data *) - dev->platform_data)->engine_to_mem; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "[%s]: platform_get_resource() failed", - __func__); - ret = -ENODEV; - goto out; - } - - device_data->phybase = res->start; - device_data->base = devm_ioremap_resource(dev, res); - if (IS_ERR(device_data->base)) { - ret = PTR_ERR(device_data->base); - goto out; - } - - spin_lock_init(&device_data->ctx_lock); - spin_lock_init(&device_data->power_state_spinlock); - - /* Enable power for CRYP hardware block */ - device_data->pwr_regulator = regulator_get(&pdev->dev, "v-ape"); - if (IS_ERR(device_data->pwr_regulator)) { - dev_err(dev, "[%s]: could not get cryp regulator", __func__); - ret = PTR_ERR(device_data->pwr_regulator); - device_data->pwr_regulator = NULL; - goto out; - } - - /* Enable the clk for CRYP hardware block */ - device_data->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(device_data->clk)) { - dev_err(dev, "[%s]: clk_get() failed!", __func__); - ret = PTR_ERR(device_data->clk); - goto out_regulator; - } - - ret = clk_prepare(device_data->clk); - if (ret) { - dev_err(dev, "[%s]: clk_prepare() failed!", __func__); - goto out_regulator; - } - - /* Enable device power (and clock) */ - ret = cryp_enable_power(device_data->dev, device_data, false); - if (ret) { - dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__); - goto out_clk_unprepare; - } - - if (cryp_check(device_data)) { - dev_err(dev, "[%s]: cryp_check() failed!", __func__); - ret = -EINVAL; - goto out_power; - } - - if (cryp_configure_protection(device_data, &prot)) { - dev_err(dev, "[%s]: cryp_configure_protection() failed!", - __func__); - ret = -EINVAL; - goto out_power; - } - - device_data->irq = platform_get_irq(pdev, 0); - if (device_data->irq <= 0) { - ret = device_data->irq ? device_data->irq : -ENXIO; - goto out_power; - } - - ret = devm_request_irq(&pdev->dev, device_data->irq, - cryp_interrupt_handler, 0, "cryp1", device_data); - if (ret) { - dev_err(dev, "[%s]: Unable to request IRQ", __func__); - goto out_power; - } - - if (cryp_mode == CRYP_MODE_DMA) - cryp_dma_setup_channel(device_data, dev); - - platform_set_drvdata(pdev, device_data); - - /* Put the new device into the device list... */ - klist_add_tail(&device_data->list_node, &driver_data.device_list); - - /* ... and signal that a new device is available. */ - up(&driver_data.device_allocation); - - atomic_set(&session_id, 1); - - ret = cryp_algs_register_all(); - if (ret) { - dev_err(dev, "[%s]: cryp_algs_register_all() failed!", - __func__); - goto out_power; - } - - dev_info(dev, "successfully registered\n"); - - return 0; - -out_power: - cryp_disable_power(device_data->dev, device_data, false); - -out_clk_unprepare: - clk_unprepare(device_data->clk); - -out_regulator: - regulator_put(device_data->pwr_regulator); - -out: - return ret; -} - -static int ux500_cryp_remove(struct platform_device *pdev) -{ - struct cryp_device_data *device_data; - - dev_dbg(&pdev->dev, "[%s]", __func__); - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!", - __func__); - return -ENOMEM; - } - - /* Try to decrease the number of available devices. */ - if (down_trylock(&driver_data.device_allocation)) - return -EBUSY; - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (device_data->current_ctx) { - /* The device is busy */ - spin_unlock(&device_data->ctx_lock); - /* Return the device to the pool. */ - up(&driver_data.device_allocation); - return -EBUSY; - } - - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - cryp_algs_unregister_all(); - - if (cryp_disable_power(&pdev->dev, device_data, false)) - dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed", - __func__); - - clk_unprepare(device_data->clk); - regulator_put(device_data->pwr_regulator); - - return 0; -} - -static void ux500_cryp_shutdown(struct platform_device *pdev) -{ - struct cryp_device_data *device_data; - - dev_dbg(&pdev->dev, "[%s]", __func__); - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!", - __func__); - return; - } - - /* Check that the device is free */ - spin_lock(&device_data->ctx_lock); - /* current_ctx allocates a device, NULL = unallocated */ - if (!device_data->current_ctx) { - if (down_trylock(&driver_data.device_allocation)) - dev_dbg(&pdev->dev, "[%s]: Cryp still in use!" - "Shutting down anyway...", __func__); - /** - * (Allocate the device) - * Need to set this to non-null (dummy) value, - * to avoid usage if context switching. - */ - device_data->current_ctx++; - } - spin_unlock(&device_data->ctx_lock); - - /* Remove the device from the list */ - if (klist_node_attached(&device_data->list_node)) - klist_remove(&device_data->list_node); - - /* If this was the last device, remove the services */ - if (list_empty(&driver_data.device_list.k_list)) - cryp_algs_unregister_all(); - - if (cryp_disable_power(&pdev->dev, device_data, false)) - dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed", - __func__); - -} - -#ifdef CONFIG_PM_SLEEP -static int ux500_cryp_suspend(struct device *dev) -{ - int ret; - struct platform_device *pdev = to_platform_device(dev); - struct cryp_device_data *device_data; - struct cryp_ctx *temp_ctx = NULL; - - dev_dbg(dev, "[%s]", __func__); - - /* Handle state? */ - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__); - return -ENOMEM; - } - - disable_irq(device_data->irq); - - spin_lock(&device_data->ctx_lock); - if (!device_data->current_ctx) - device_data->current_ctx++; - spin_unlock(&device_data->ctx_lock); - - if (device_data->current_ctx == ++temp_ctx) { - if (down_interruptible(&driver_data.device_allocation)) - dev_dbg(dev, "[%s]: down_interruptible() failed", - __func__); - ret = cryp_disable_power(dev, device_data, false); - - } else - ret = cryp_disable_power(dev, device_data, true); - - if (ret) - dev_err(dev, "[%s]: cryp_disable_power()", __func__); - - return ret; -} - -static int ux500_cryp_resume(struct device *dev) -{ - int ret = 0; - struct platform_device *pdev = to_platform_device(dev); - struct cryp_device_data *device_data; - struct cryp_ctx *temp_ctx = NULL; - - dev_dbg(dev, "[%s]", __func__); - - device_data = platform_get_drvdata(pdev); - if (!device_data) { - dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__); - return -ENOMEM; - } - - spin_lock(&device_data->ctx_lock); - if (device_data->current_ctx == ++temp_ctx) - device_data->current_ctx = NULL; - spin_unlock(&device_data->ctx_lock); - - - if (!device_data->current_ctx) - up(&driver_data.device_allocation); - else - ret = cryp_enable_power(dev, device_data, true); - - if (ret) - dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__); - else - enable_irq(device_data->irq); - - return ret; -} -#endif - -static SIMPLE_DEV_PM_OPS(ux500_cryp_pm, ux500_cryp_suspend, ux500_cryp_resume); - -static const struct of_device_id ux500_cryp_match[] = { - { .compatible = "stericsson,ux500-cryp" }, - { }, -}; -MODULE_DEVICE_TABLE(of, ux500_cryp_match); - -static struct platform_driver cryp_driver = { - .probe = ux500_cryp_probe, - .remove = ux500_cryp_remove, - .shutdown = ux500_cryp_shutdown, - .driver = { - .name = "cryp1", - .of_match_table = ux500_cryp_match, - .pm = &ux500_cryp_pm, - } -}; - -static int __init ux500_cryp_mod_init(void) -{ - pr_debug("[%s] is called!", __func__); - klist_init(&driver_data.device_list, NULL, NULL); - /* Initialize the semaphore to 0 devices (locked state) */ - sema_init(&driver_data.device_allocation, 0); - return platform_driver_register(&cryp_driver); -} - -static void __exit ux500_cryp_mod_fini(void) -{ - pr_debug("[%s] is called!", __func__); - platform_driver_unregister(&cryp_driver); -} - -module_init(ux500_cryp_mod_init); -module_exit(ux500_cryp_mod_fini); - -module_param(cryp_mode, int, 0); - -MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine."); -MODULE_ALIAS_CRYPTO("aes-all"); -MODULE_ALIAS_CRYPTO("des-all"); - -MODULE_LICENSE("GPL"); diff --git a/drivers/crypto/ux500/cryp/cryp_irq.c b/drivers/crypto/ux500/cryp/cryp_irq.c deleted file mode 100644 index 6d2f07bec98a7e74beba3c0297de40fa11538094..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irq.c +++ /dev/null @@ -1,45 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#include -#include -#include - -#include "cryp.h" -#include "cryp_p.h" -#include "cryp_irq.h" -#include "cryp_irqp.h" - -void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - u32 i; - - dev_dbg(device_data->dev, "[%s]", __func__); - - i = readl_relaxed(&device_data->base->imsc); - i = i | irq_src; - writel_relaxed(i, &device_data->base->imsc); -} - -void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - u32 i; - - dev_dbg(device_data->dev, "[%s]", __func__); - - i = readl_relaxed(&device_data->base->imsc); - i = i & ~irq_src; - writel_relaxed(i, &device_data->base->imsc); -} - -bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src) -{ - return (readl_relaxed(&device_data->base->mis) & irq_src) > 0; -} diff --git a/drivers/crypto/ux500/cryp/cryp_irq.h b/drivers/crypto/ux500/cryp/cryp_irq.h deleted file mode 100644 index da90029ea141240bb5c57d98eb31c28043036c13..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irq.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_IRQ_H_ -#define _CRYP_IRQ_H_ - -#include "cryp.h" - -enum cryp_irq_src_id { - CRYP_IRQ_SRC_INPUT_FIFO = 0x1, - CRYP_IRQ_SRC_OUTPUT_FIFO = 0x2, - CRYP_IRQ_SRC_ALL = 0x3 -}; - -/* - * M0 Funtions - */ -void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src); - -#endif /* _CRYP_IRQ_H_ */ diff --git a/drivers/crypto/ux500/cryp/cryp_irqp.h b/drivers/crypto/ux500/cryp/cryp_irqp.h deleted file mode 100644 index 4981a3f461e5ed6f8bc0e1c378749ce5a857a956..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp_irqp.h +++ /dev/null @@ -1,125 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef __CRYP_IRQP_H_ -#define __CRYP_IRQP_H_ - -#include "cryp_irq.h" - -/* - * - * CRYP Registers - Offset mapping - * +-----------------+ - * 00h | CRYP_CR | Configuration register - * +-----------------+ - * 04h | CRYP_SR | Status register - * +-----------------+ - * 08h | CRYP_DIN | Data In register - * +-----------------+ - * 0ch | CRYP_DOUT | Data out register - * +-----------------+ - * 10h | CRYP_DMACR | DMA control register - * +-----------------+ - * 14h | CRYP_IMSC | IMSC - * +-----------------+ - * 18h | CRYP_RIS | Raw interrupt status - * +-----------------+ - * 1ch | CRYP_MIS | Masked interrupt status. - * +-----------------+ - * Key registers - * IVR registers - * Peripheral - * Cell IDs - * - * Refer data structure for other register map - */ - -/** - * struct cryp_register - * @cr - Configuration register - * @status - Status register - * @din - Data input register - * @din_size - Data input size register - * @dout - Data output register - * @dout_size - Data output size register - * @dmacr - Dma control register - * @imsc - Interrupt mask set/clear register - * @ris - Raw interrupt status - * @mis - Masked interrupt statu register - * @key_1_l - Key register 1 L - * @key_1_r - Key register 1 R - * @key_2_l - Key register 2 L - * @key_2_r - Key register 2 R - * @key_3_l - Key register 3 L - * @key_3_r - Key register 3 R - * @key_4_l - Key register 4 L - * @key_4_r - Key register 4 R - * @init_vect_0_l - init vector 0 L - * @init_vect_0_r - init vector 0 R - * @init_vect_1_l - init vector 1 L - * @init_vect_1_r - init vector 1 R - * @cryp_unused1 - unused registers - * @itcr - Integration test control register - * @itip - Integration test input register - * @itop - Integration test output register - * @cryp_unused2 - unused registers - * @periphId0 - FE0 CRYP Peripheral Identication Register - * @periphId1 - FE4 - * @periphId2 - FE8 - * @periphId3 - FEC - * @pcellId0 - FF0 CRYP PCell Identication Register - * @pcellId1 - FF4 - * @pcellId2 - FF8 - * @pcellId3 - FFC - */ -struct cryp_register { - u32 cr; /* Configuration register */ - u32 sr; /* Status register */ - u32 din; /* Data input register */ - u32 din_size; /* Data input size register */ - u32 dout; /* Data output register */ - u32 dout_size; /* Data output size register */ - u32 dmacr; /* Dma control register */ - u32 imsc; /* Interrupt mask set/clear register */ - u32 ris; /* Raw interrupt status */ - u32 mis; /* Masked interrupt statu register */ - - u32 key_1_l; /*Key register 1 L */ - u32 key_1_r; /*Key register 1 R */ - u32 key_2_l; /*Key register 2 L */ - u32 key_2_r; /*Key register 2 R */ - u32 key_3_l; /*Key register 3 L */ - u32 key_3_r; /*Key register 3 R */ - u32 key_4_l; /*Key register 4 L */ - u32 key_4_r; /*Key register 4 R */ - - u32 init_vect_0_l; /*init vector 0 L */ - u32 init_vect_0_r; /*init vector 0 R */ - u32 init_vect_1_l; /*init vector 1 L */ - u32 init_vect_1_r; /*init vector 1 R */ - - u32 cryp_unused1[(0x80 - 0x58) / sizeof(u32)]; /* unused registers */ - u32 itcr; /*Integration test control register */ - u32 itip; /*Integration test input register */ - u32 itop; /*Integration test output register */ - u32 cryp_unused2[(0xFE0 - 0x8C) / sizeof(u32)]; /* unused registers */ - - u32 periphId0; /* FE0 CRYP Peripheral Identication Register */ - u32 periphId1; /* FE4 */ - u32 periphId2; /* FE8 */ - u32 periphId3; /* FEC */ - - u32 pcellId0; /* FF0 CRYP PCell Identication Register */ - u32 pcellId1; /* FF4 */ - u32 pcellId2; /* FF8 */ - u32 pcellId3; /* FFC */ -}; - -#endif diff --git a/drivers/crypto/ux500/cryp/cryp_p.h b/drivers/crypto/ux500/cryp/cryp_p.h deleted file mode 100644 index 60b47fe4de35d9b29c3ed2a0e902254f6de1bd38..0000000000000000000000000000000000000000 --- a/drivers/crypto/ux500/cryp/cryp_p.h +++ /dev/null @@ -1,122 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) ST-Ericsson SA 2010 - * Author: Shujuan Chen for ST-Ericsson. - * Author: Jonas Linde for ST-Ericsson. - * Author: Joakim Bech for ST-Ericsson. - * Author: Berne Hebark for ST-Ericsson. - * Author: Niklas Hernaeus for ST-Ericsson. - */ - -#ifndef _CRYP_P_H_ -#define _CRYP_P_H_ - -#include -#include - -#include "cryp.h" -#include "cryp_irqp.h" - -/* - * Generic Macros - */ -#define CRYP_SET_BITS(reg_name, mask) \ - writel_relaxed((readl_relaxed(reg_name) | mask), reg_name) - -#define CRYP_WRITE_BIT(reg_name, val, mask) \ - writel_relaxed(((readl_relaxed(reg_name) & ~(mask)) |\ - ((val) & (mask))), reg_name) - -#define CRYP_TEST_BITS(reg_name, val) \ - (readl_relaxed(reg_name) & (val)) - -#define CRYP_PUT_BITS(reg, val, shift, mask) \ - writel_relaxed(((readl_relaxed(reg) & ~(mask)) | \ - (((u32)val << shift) & (mask))), reg) - -/* - * CRYP specific Macros - */ -#define CRYP_PERIPHERAL_ID0 0xE3 -#define CRYP_PERIPHERAL_ID1 0x05 - -#define CRYP_PERIPHERAL_ID2_DB8500 0x28 -#define CRYP_PERIPHERAL_ID3 0x00 - -#define CRYP_PCELL_ID0 0x0D -#define CRYP_PCELL_ID1 0xF0 -#define CRYP_PCELL_ID2 0x05 -#define CRYP_PCELL_ID3 0xB1 - -/* - * CRYP register default values - */ -#define MAX_DEVICE_SUPPORT 2 - -/* Priv set, keyrden set and datatype 8bits swapped set as default. */ -#define CRYP_CR_DEFAULT 0x0482 -#define CRYP_DMACR_DEFAULT 0x0 -#define CRYP_IMSC_DEFAULT 0x0 -#define CRYP_DIN_DEFAULT 0x0 -#define CRYP_DOUT_DEFAULT 0x0 -#define CRYP_KEY_DEFAULT 0x0 -#define CRYP_INIT_VECT_DEFAULT 0x0 - -/* - * CRYP Control register specific mask - */ -#define CRYP_CR_SECURE_MASK BIT(0) -#define CRYP_CR_PRLG_MASK BIT(1) -#define CRYP_CR_ALGODIR_MASK BIT(2) -#define CRYP_CR_ALGOMODE_MASK (BIT(5) | BIT(4) | BIT(3)) -#define CRYP_CR_DATATYPE_MASK (BIT(7) | BIT(6)) -#define CRYP_CR_KEYSIZE_MASK (BIT(9) | BIT(8)) -#define CRYP_CR_KEYRDEN_MASK BIT(10) -#define CRYP_CR_KSE_MASK BIT(11) -#define CRYP_CR_START_MASK BIT(12) -#define CRYP_CR_INIT_MASK BIT(13) -#define CRYP_CR_FFLUSH_MASK BIT(14) -#define CRYP_CR_CRYPEN_MASK BIT(15) -#define CRYP_CR_CONTEXT_SAVE_MASK (CRYP_CR_SECURE_MASK |\ - CRYP_CR_PRLG_MASK |\ - CRYP_CR_ALGODIR_MASK |\ - CRYP_CR_ALGOMODE_MASK |\ - CRYP_CR_KEYSIZE_MASK |\ - CRYP_CR_KEYRDEN_MASK |\ - CRYP_CR_DATATYPE_MASK) - - -#define CRYP_SR_INFIFO_READY_MASK (BIT(0) | BIT(1)) -#define CRYP_SR_IFEM_MASK BIT(0) -#define CRYP_SR_BUSY_MASK BIT(4) - -/* - * Bit position used while setting bits in register - */ -#define CRYP_CR_PRLG_POS 1 -#define CRYP_CR_ALGODIR_POS 2 -#define CRYP_CR_ALGOMODE_POS 3 -#define CRYP_CR_DATATYPE_POS 6 -#define CRYP_CR_KEYSIZE_POS 8 -#define CRYP_CR_KEYRDEN_POS 10 -#define CRYP_CR_KSE_POS 11 -#define CRYP_CR_START_POS 12 -#define CRYP_CR_INIT_POS 13 -#define CRYP_CR_CRYPEN_POS 15 - -#define CRYP_SR_BUSY_POS 4 - -/* - * CRYP PCRs------PC_NAND control register - * BIT_MASK - */ -#define CRYP_DMA_REQ_MASK (BIT(1) | BIT(0)) -#define CRYP_DMA_REQ_MASK_POS 0 - - -struct cryp_system_context { - /* CRYP Register structure */ - struct cryp_register *p_cryp_reg[MAX_DEVICE_SUPPORT]; -}; - -#endif diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c index 168195672e2e10bed4a18d13991de26ef6536125..b2979be613b8f5cb1f0d0194216b8d1c6ff00d5c 100644 --- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c +++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c @@ -479,6 +479,9 @@ static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm) ctx->enginectx.op.prepare_request = NULL; ctx->enginectx.op.unprepare_request = NULL; + akcipher_set_reqsize(tfm, + sizeof(struct virtio_crypto_akcipher_request)); + return 0; } @@ -505,7 +508,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { .max_size = virtio_crypto_rsa_max_size, .init = virtio_crypto_rsa_init_tfm, .exit = virtio_crypto_rsa_exit_tfm, - .reqsize = sizeof(struct virtio_crypto_akcipher_request), .base = { .cra_name = "rsa", .cra_driver_name = "virtio-crypto-rsa", @@ -528,7 +530,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = { .max_size = virtio_crypto_rsa_max_size, .init = virtio_crypto_rsa_init_tfm, .exit = virtio_crypto_rsa_exit_tfm, - .reqsize = sizeof(struct virtio_crypto_akcipher_request), .base = { .cra_name = "pkcs1pad(rsa,sha1)", .cra_driver_name = "virtio-pkcs1-rsa-with-sha1", diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c index e3885c90a3acba1b7cebbf895038893d097aa223..1c76aed8e2626c4b26a4276ef2b90c99d8451cec 100644 --- a/drivers/dma-buf/dma-resv.c +++ b/drivers/dma-buf/dma-resv.c @@ -98,12 +98,17 @@ static void dma_resv_list_set(struct dma_resv_list *list, static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences) { struct dma_resv_list *list; + size_t size; - list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL); + /* Round up to the next kmalloc bucket size. */ + size = kmalloc_size_roundup(struct_size(list, table, max_fences)); + + list = kmalloc(size, GFP_KERNEL); if (!list) return NULL; - list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) / + /* Given the resulting bucket size, recalculated max_fences. */ + list->max_fences = (size - offsetof(typeof(*list), table)) / sizeof(*list->table); return list; diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c index a9b96b18772f322ef4c74ba8acad04ae1f978c99..e13e92609943dbb91769a2c22b1bff57b228e2e6 100644 --- a/drivers/dma/idxd/cdev.c +++ b/drivers/dma/idxd/cdev.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp) filp->private_data = ctx; if (device_user_pasid_enabled(idxd)) { - sva = iommu_sva_bind_device(dev, current->mm, NULL); + sva = iommu_sva_bind_device(dev, current->mm); if (IS_ERR(sva)) { rc = PTR_ERR(sva); dev_err(dev, "pasid allocation failed: %d\n", rc); diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 09cbf0c179ba9e29df1b2d835fe3cd08b9141ea5..529ea09c909408952fe32777bb7c71c95a2506ce 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d static int idxd_enable_system_pasid(struct idxd_device *idxd) { - int flags; - unsigned int pasid; - struct iommu_sva *sva; - - flags = SVM_FLAG_SUPERVISOR_MODE; - - sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags); - if (IS_ERR(sva)) { - dev_warn(&idxd->pdev->dev, - "iommu sva bind failed: %ld\n", PTR_ERR(sva)); - return PTR_ERR(sva); - } - - pasid = iommu_sva_get_pasid(sva); - if (pasid == IOMMU_PASID_INVALID) { - iommu_sva_unbind_device(sva); - return -ENODEV; - } - - idxd->sva = sva; - idxd->pasid = pasid; - dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid); - return 0; + return -EOPNOTSUPP; } static void idxd_disable_system_pasid(struct idxd_device *idxd) diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c index c2d34dc8ba462828a428ee26899193c40b8c8889..6ea5789a89e2be9626928847456a3bc3b7cae47c 100644 --- a/drivers/firmware/turris-mox-rwtm.c +++ b/drivers/firmware/turris-mox-rwtm.c @@ -528,7 +528,6 @@ static int turris_mox_rwtm_probe(struct platform_device *pdev) rwtm->hwrng.name = DRIVER_NAME "_hwrng"; rwtm->hwrng.read = mox_hwrng_read; rwtm->hwrng.priv = (unsigned long) rwtm; - rwtm->hwrng.quality = 1024; ret = devm_hwrng_register(dev, &rwtm->hwrng); if (ret < 0) { diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig index a01af11806164c42680e2eb2a917fecca9a9f6f5..ec7cfd4f52b1eac199ffb9c58199ad6e64d6c138 100644 --- a/drivers/gpio/Kconfig +++ b/drivers/gpio/Kconfig @@ -109,6 +109,15 @@ config GPIO_REGMAP config GPIO_MAX730X tristate +config GPIO_IDIO_16 + tristate + help + Enables support for the idio-16 library functions. The idio-16 library + provides functions to facilitate communication with devices within the + ACCES IDIO-16 family such as the 104-IDIO-16 and the PCI-IDIO-16. + + If built as a module its name will be gpio-idio-16. + menu "Memory mapped GPIO drivers" depends on HAS_IOMEM @@ -219,7 +228,7 @@ config GPIO_CLPS711X Say yes here to support GPIO on CLPS711X SoCs. config GPIO_DAVINCI - bool "TI Davinci/Keystone GPIO support" + tristate "TI Davinci/Keystone GPIO support" default y if ARCH_DAVINCI depends on (ARM || ARM64) && (ARCH_DAVINCI || ARCH_KEYSTONE || ARCH_K3) help @@ -310,7 +319,7 @@ config GPIO_GRGPIO config GPIO_HISI tristate "HiSilicon GPIO controller driver" - depends on (ARM64 && ACPI) || COMPILE_TEST + depends on ARM64 || COMPILE_TEST select GPIO_GENERIC select GPIOLIB_IRQCHIP help @@ -600,14 +609,6 @@ config GPIO_SPRD help Say yes here to support Spreadtrum GPIO device. -config GPIO_STA2X11 - bool "STA2x11/ConneXt GPIO support" - depends on MFD_STA2X11 - select GENERIC_IRQ_CHIP - help - Say yes here to support the STA2x11/ConneXt GPIO device. - The GPIO module has 128 GPIO pins with alternate functions. - config GPIO_STP_XWAY bool "XWAY STP GPIOs" depends on SOC_XWAY || COMPILE_TEST @@ -857,6 +858,7 @@ config GPIO_104_IDIO_16 depends on PC104 select ISA_BUS_API select GPIOLIB_IRQCHIP + select GPIO_IDIO_16 help Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16, 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, 104-IDO-8). The @@ -1561,6 +1563,7 @@ config GPIO_PCH config GPIO_PCI_IDIO_16 tristate "ACCES PCI-IDIO-16 GPIO support" select GPIOLIB_IRQCHIP + select GPIO_IDIO_16 help Enables GPIO support for the ACCES PCI-IDIO-16. An interrupt is generated when any of the inputs change state (low to high or high to @@ -1681,6 +1684,12 @@ config GPIO_AGGREGATOR industrial control context, to be operated from userspace using the GPIO chardev interface. +config GPIO_LATCH + tristate "GPIO latch driver" + help + Say yes here to enable a driver for GPIO multiplexers based on latches + connected to other GPIOs. + config GPIO_MOCKUP tristate "GPIO Testing Driver" select IRQ_SIM diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile index 29e3beb6548cb0361979722158179c884b4bab5e..010587025fc87197fa837511ab646a088a343530 100644 --- a/drivers/gpio/Makefile +++ b/drivers/gpio/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_OF_GPIO) += gpiolib-of.o obj-$(CONFIG_GPIO_CDEV) += gpiolib-cdev.o obj-$(CONFIG_GPIO_SYSFS) += gpiolib-sysfs.o obj-$(CONFIG_GPIO_ACPI) += gpiolib-acpi.o +obj-$(CONFIG_GPIOLIB) += gpiolib-swnode.o # Device drivers. Generally keep list sorted alphabetically obj-$(CONFIG_GPIO_REGMAP) += gpio-regmap.o @@ -68,6 +69,7 @@ obj-$(CONFIG_GPIO_HLWD) += gpio-hlwd.o obj-$(CONFIG_HTC_EGPIO) += gpio-htc-egpio.o obj-$(CONFIG_GPIO_I8255) += gpio-i8255.o obj-$(CONFIG_GPIO_ICH) += gpio-ich.o +obj-$(CONFIG_GPIO_IDIO_16) += gpio-idio-16.o obj-$(CONFIG_GPIO_IDT3243X) += gpio-idt3243x.o obj-$(CONFIG_GPIO_IMX_SCU) += gpio-imx-scu.o obj-$(CONFIG_GPIO_IOP) += gpio-iop.o @@ -75,6 +77,7 @@ obj-$(CONFIG_GPIO_IT87) += gpio-it87.o obj-$(CONFIG_GPIO_IXP4XX) += gpio-ixp4xx.o obj-$(CONFIG_GPIO_JANZ_TTL) += gpio-janz-ttl.o obj-$(CONFIG_GPIO_KEMPLD) += gpio-kempld.o +obj-$(CONFIG_GPIO_LATCH) += gpio-latch.o obj-$(CONFIG_GPIO_LOGICVC) += gpio-logicvc.o obj-$(CONFIG_GPIO_LOONGSON1) += gpio-loongson1.o obj-$(CONFIG_GPIO_LOONGSON) += gpio-loongson.o @@ -140,7 +143,6 @@ obj-$(CONFIG_GPIO_SL28CPLD) += gpio-sl28cpld.o obj-$(CONFIG_GPIO_SODAVILLE) += gpio-sodaville.o obj-$(CONFIG_GPIO_SPEAR_SPICS) += gpio-spear-spics.o obj-$(CONFIG_GPIO_SPRD) += gpio-sprd.o -obj-$(CONFIG_GPIO_STA2X11) += gpio-sta2x11.o obj-$(CONFIG_GPIO_STMPE) += gpio-stmpe.o obj-$(CONFIG_GPIO_STP_XWAY) += gpio-stp-xway.o obj-$(CONFIG_GPIO_SYSCON) += gpio-syscon.o diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO index f87ff3fa8a531d27ac537d7f1a3eb4dd894dcad2..76560744587a5fff7235a689733bc72a9073fbbf 100644 --- a/drivers/gpio/TODO +++ b/drivers/gpio/TODO @@ -124,6 +124,13 @@ Work items: this with dry-coding and sending to maintainers to test +Generic regmap GPIO + +In the very similar way to Generic MMIO GPIO convert the users which can +take advantage of using regmap over direct IO accessors. Note, even in +MMIO case the regmap MMIO with gpio-regmap.c is preferable over gpio-mmio.c. + + GPIOLIB irqchip The GPIOLIB irqchip is a helper irqchip for "simple cases" that should diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c index 718bd54e2a25511da21604e0b3942d98fda278b0..098fbefdbe226891db337eefe77206344d5f1a8e 100644 --- a/drivers/gpio/gpio-104-idio-16.c +++ b/drivers/gpio/gpio-104-idio-16.c @@ -6,7 +6,7 @@ * This driver supports the following ACCES devices: 104-IDIO-16, * 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, and 104-IDO-8. */ -#include +#include #include #include #include @@ -21,6 +21,8 @@ #include #include +#include "gpio-idio-16.h" + #define IDIO_16_EXTENT 8 #define MAX_NUM_IDIO_16 max_num_isa_dev(IDIO_16_EXTENT) @@ -34,49 +36,26 @@ static unsigned int num_irq; module_param_hw_array(irq, uint, irq, &num_irq, 0); MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers"); -/** - * struct idio_16_reg - device registers structure - * @out0_7: Read: N/A - * Write: FET Drive Outputs 0-7 - * @in0_7: Read: Isolated Inputs 0-7 - * Write: Clear Interrupt - * @irq_ctl: Read: Enable IRQ - * Write: Disable IRQ - * @unused: N/A - * @out8_15: Read: N/A - * Write: FET Drive Outputs 8-15 - * @in8_15: Read: Isolated Inputs 8-15 - * Write: N/A - */ -struct idio_16_reg { - u8 out0_7; - u8 in0_7; - u8 irq_ctl; - u8 unused; - u8 out8_15; - u8 in8_15; -}; - /** * struct idio_16_gpio - GPIO device private data structure * @chip: instance of the gpio_chip * @lock: synchronization lock to prevent I/O race conditions * @irq_mask: I/O bits affected by interrupts * @reg: I/O address offset for the device registers - * @out_state: output bits state + * @state: ACCES IDIO-16 device state */ struct idio_16_gpio { struct gpio_chip chip; raw_spinlock_t lock; unsigned long irq_mask; - struct idio_16_reg __iomem *reg; - unsigned int out_state; + struct idio_16 __iomem *reg; + struct idio_16_state state; }; static int idio_16_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { - if (offset > 15) + if (idio_16_get_direction(offset)) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; @@ -98,15 +77,8 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip, static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - const unsigned int mask = BIT(offset-16); - if (offset < 16) - return -EINVAL; - - if (offset < 24) - return !!(ioread8(&idio16gpio->reg->in0_7) & mask); - - return !!(ioread8(&idio16gpio->reg->in8_15) & (mask>>8)); + return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset); } static int idio_16_gpio_get_multiple(struct gpio_chip *chip, @@ -114,11 +86,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip, { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - *bits = 0; - if (*mask & GENMASK(23, 16)) - *bits |= (unsigned long)ioread8(&idio16gpio->reg->in0_7) << 16; - if (*mask & GENMASK(31, 24)) - *bits |= (unsigned long)ioread8(&idio16gpio->reg->in8_15) << 24; + idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); return 0; } @@ -127,44 +95,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - const unsigned int mask = BIT(offset); - unsigned long flags; - if (offset > 15) - return; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - if (value) - idio16gpio->out_state |= mask; - else - idio16gpio->out_state &= ~mask; - - if (offset > 7) - iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15); - else - iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value); } static void idio_16_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long flags; - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - idio16gpio->out_state &= ~*mask; - idio16gpio->out_state |= *mask & *bits; - - if (*mask & 0xFF) - iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7); - if ((*mask >> 8) & 0xFF) - iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); } static void idio_16_irq_ack(struct irq_data *data) @@ -301,7 +241,10 @@ static int idio_16_probe(struct device *dev, unsigned int id) idio16gpio->chip.get_multiple = idio_16_gpio_get_multiple; idio16gpio->chip.set = idio_16_gpio_set; idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple; - idio16gpio->out_state = 0xFFFF; + + idio_16_state_init(&idio16gpio->state); + /* FET off states are represented by bit values of "1" */ + bitmap_fill(idio16gpio->state.out_state, IDIO_16_NOUT); girq = &idio16gpio->chip.irq; gpio_irq_chip_set_chip(girq, &idio_16_irqchip); @@ -343,3 +286,4 @@ module_isa_driver_with_irq(idio_16_driver, num_idio_16, num_irq); MODULE_AUTHOR("William Breathitt Gray "); MODULE_DESCRIPTION("ACCES 104-IDIO-16 GPIO driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(GPIO_IDIO_16); diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c index 0cb2664085cf83144a5a78253b255653d5a6a03a..6d17d262ad9170dc278fdc691b516b86d90f9a8f 100644 --- a/drivers/gpio/gpio-aggregator.c +++ b/drivers/gpio/gpio-aggregator.c @@ -23,6 +23,7 @@ #include #include +#define AGGREGATOR_MAX_GPIOS 512 /* * GPIO Aggregator sysfs interface @@ -64,7 +65,7 @@ static int aggr_parse(struct gpio_aggregator *aggr) unsigned int i, n = 0; int error = 0; - bitmap = bitmap_alloc(ARCH_NR_GPIOS, GFP_KERNEL); + bitmap = bitmap_alloc(AGGREGATOR_MAX_GPIOS, GFP_KERNEL); if (!bitmap) return -ENOMEM; @@ -84,13 +85,13 @@ static int aggr_parse(struct gpio_aggregator *aggr) } /* GPIO chip + offset(s) */ - error = bitmap_parselist(offsets, bitmap, ARCH_NR_GPIOS); + error = bitmap_parselist(offsets, bitmap, AGGREGATOR_MAX_GPIOS); if (error) { pr_err("Cannot parse %s: %d\n", offsets, error); goto free_bitmap; } - for_each_set_bit(i, bitmap, ARCH_NR_GPIOS) { + for_each_set_bit(i, bitmap, AGGREGATOR_MAX_GPIOS) { error = aggr_add_gpio(aggr, name, i, &n); if (error) goto free_bitmap; diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c index 59c4c48d8296b870fd7bd243eb5df4f74213914f..fa51a91afa54fd28dff649198bc8218cf9d6fc9f 100644 --- a/drivers/gpio/gpio-davinci.c +++ b/drivers/gpio/gpio-davinci.c @@ -217,9 +217,6 @@ static int davinci_gpio_probe(struct platform_device *pdev) return -EINVAL; } - if (WARN_ON(ARCH_NR_GPIOS < ngpio)) - ngpio = ARCH_NR_GPIOS; - /* * If there are unbanked interrupts then the number of * interrupts is equal to number of gpios else all are banked so @@ -730,3 +727,14 @@ static int __init davinci_gpio_drv_reg(void) return platform_driver_register(&davinci_gpio_driver); } postcore_initcall(davinci_gpio_drv_reg); + +static void __exit davinci_gpio_exit(void) +{ + platform_driver_unregister(&davinci_gpio_driver); +} +module_exit(davinci_gpio_exit); + +MODULE_AUTHOR("Jan Kotas "); +MODULE_DESCRIPTION("DAVINCI GPIO driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("platform:gpio-davinci"); diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c index 482f678c893e5ebed813508e4e6d4d030be33971..df1bdaae441c5e5ae50a223eebd3aa14a3293f90 100644 --- a/drivers/gpio/gpio-exar.c +++ b/drivers/gpio/gpio-exar.c @@ -141,6 +141,7 @@ static const struct regmap_config exar_regmap_config = { .name = "exar-gpio", .reg_bits = 16, .val_bits = 8, + .io_port = true, }; static int gpio_exar_probe(struct platform_device *pdev) diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c index f77a965f5780d8d9817f2546df480e3341cb3b68..2728672ef9f839c9ed441f5a9c55b7b052f77120 100644 --- a/drivers/gpio/gpio-ftgpio010.c +++ b/drivers/gpio/gpio-ftgpio010.c @@ -277,7 +277,7 @@ static int ftgpio_gpio_probe(struct platform_device *pdev) dev_err(dev, "unable to init generic GPIO\n"); goto dis_clk; } - g->gc.label = "FTGPIO010"; + g->gc.label = dev_name(dev); g->gc.base = -1; g->gc.parent = dev; g->gc.owner = THIS_MODULE; diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c index 2109803ffb386af227af4ca9613db2f359bf27e6..5057fa9ad6108a26f093f24eafbc51e91fd4b5b9 100644 --- a/drivers/gpio/gpio-gw-pld.c +++ b/drivers/gpio/gpio-gw-pld.c @@ -67,8 +67,7 @@ static void gw_pld_set8(struct gpio_chip *gc, unsigned offset, int value) gw_pld_output8(gc, offset, value); } -static int gw_pld_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int gw_pld_probe(struct i2c_client *client) { struct device *dev = &client->dev; struct gw_pld *gw; @@ -126,7 +125,7 @@ static struct i2c_driver gw_pld_driver = { .name = "gw_pld", .of_match_table = gw_pld_dt_ids, }, - .probe = gw_pld_probe, + .probe_new = gw_pld_probe, .id_table = gw_pld_id, }; module_i2c_driver(gw_pld_driver); diff --git a/drivers/gpio/gpio-hisi.c b/drivers/gpio/gpio-hisi.c index 3caabef5c7a2ea069a8f9d8dd18cae40d5b1b4c5..55bd69043bf429cd9a40b3509082af2c3d134ce2 100644 --- a/drivers/gpio/gpio-hisi.c +++ b/drivers/gpio/gpio-hisi.c @@ -221,6 +221,12 @@ static const struct acpi_device_id hisi_gpio_acpi_match[] = { }; MODULE_DEVICE_TABLE(acpi, hisi_gpio_acpi_match); +static const struct of_device_id hisi_gpio_dts_match[] = { + { .compatible = "hisilicon,ascend910-gpio", }, + { } +}; +MODULE_DEVICE_TABLE(of, hisi_gpio_dts_match); + static void hisi_gpio_get_pdata(struct device *dev, struct hisi_gpio *hisi_gpio) { @@ -311,6 +317,7 @@ static struct platform_driver hisi_gpio_driver = { .driver = { .name = HISI_GPIO_DRIVER_NAME, .acpi_match_table = hisi_gpio_acpi_match, + .of_match_table = hisi_gpio_dts_match, }, .probe = hisi_gpio_probe, }; diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c new file mode 100644 index 0000000000000000000000000000000000000000..13315242d2209e7487105cff001d21146b9dfbd1 --- /dev/null +++ b/drivers/gpio/gpio-idio-16.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPIO library for the ACCES IDIO-16 family + * Copyright (C) 2022 William Breathitt Gray + */ +#include +#include +#include +#include +#include +#include + +#include "gpio-idio-16.h" + +#define DEFAULT_SYMBOL_NAMESPACE GPIO_IDIO_16 + +/** + * idio_16_get - get signal value at signal offset + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @offset: offset of signal to get + * + * Returns the signal value (0=low, 1=high) for the signal at @offset. + */ +int idio_16_get(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, const unsigned long offset) +{ + const unsigned long mask = BIT(offset); + + if (offset < IDIO_16_NOUT) + return test_bit(offset, state->out_state); + + if (offset < 24) + return !!(ioread8(®->in0_7) & (mask >> IDIO_16_NOUT)); + + if (offset < 32) + return !!(ioread8(®->in8_15) & (mask >> 24)); + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(idio_16_get); + +/** + * idio_16_get_multiple - get multiple signal values at multiple signal offsets + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @mask: mask of signals to get + * @bits: bitmap to store signal values + * + * Stores in @bits the values (0=low, 1=high) for the signals defined by @mask. + */ +void idio_16_get_multiple(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, + const unsigned long *const mask, + unsigned long *const bits) +{ + unsigned long flags; + const unsigned long out_mask = GENMASK(IDIO_16_NOUT - 1, 0); + + spin_lock_irqsave(&state->lock, flags); + + bitmap_replace(bits, bits, state->out_state, &out_mask, IDIO_16_NOUT); + if (*mask & GENMASK(23, 16)) + bitmap_set_value8(bits, ioread8(®->in0_7), 16); + if (*mask & GENMASK(31, 24)) + bitmap_set_value8(bits, ioread8(®->in8_15), 24); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_get_multiple); + +/** + * idio_16_set - set signal value at signal offset + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @offset: offset of signal to set + * @value: value of signal to set + * + * Assigns output @value for the signal at @offset. + */ +void idio_16_set(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, const unsigned long offset, + const unsigned long value) +{ + unsigned long flags; + + if (offset >= IDIO_16_NOUT) + return; + + spin_lock_irqsave(&state->lock, flags); + + __assign_bit(offset, state->out_state, value); + if (offset < 8) + iowrite8(bitmap_get_value8(state->out_state, 0), ®->out0_7); + else + iowrite8(bitmap_get_value8(state->out_state, 8), ®->out8_15); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_set); + +/** + * idio_16_set_multiple - set signal values at multiple signal offsets + * @reg: ACCES IDIO-16 device registers + * @state: ACCES IDIO-16 device state + * @mask: mask of signals to set + * @bits: bitmap of signal output values + * + * Assigns output values defined by @bits for the signals defined by @mask. + */ +void idio_16_set_multiple(struct idio_16 __iomem *const reg, + struct idio_16_state *const state, + const unsigned long *const mask, + const unsigned long *const bits) +{ + unsigned long flags; + + spin_lock_irqsave(&state->lock, flags); + + bitmap_replace(state->out_state, state->out_state, bits, mask, + IDIO_16_NOUT); + if (*mask & GENMASK(7, 0)) + iowrite8(bitmap_get_value8(state->out_state, 0), ®->out0_7); + if (*mask & GENMASK(15, 8)) + iowrite8(bitmap_get_value8(state->out_state, 8), ®->out8_15); + + spin_unlock_irqrestore(&state->lock, flags); +} +EXPORT_SYMBOL_GPL(idio_16_set_multiple); + +/** + * idio_16_state_init - initialize idio_16_state structure + * @state: ACCES IDIO-16 device state + * + * Initializes the ACCES IDIO-16 device @state for use in idio-16 library + * functions. + */ +void idio_16_state_init(struct idio_16_state *const state) +{ + spin_lock_init(&state->lock); +} +EXPORT_SYMBOL_GPL(idio_16_state_init); + +MODULE_AUTHOR("William Breathitt Gray"); +MODULE_DESCRIPTION("ACCES IDIO-16 GPIO Library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/gpio/gpio-idio-16.h b/drivers/gpio/gpio-idio-16.h new file mode 100644 index 0000000000000000000000000000000000000000..928f8251a2bd9e206026ccef197b7d8c2b2ab2c8 --- /dev/null +++ b/drivers/gpio/gpio-idio-16.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright 2022 William Breathitt Gray */ +#ifndef _IDIO_16_H_ +#define _IDIO_16_H_ + +#include +#include + +/** + * struct idio_16 - IDIO-16 registers structure + * @out0_7: Read: FET Drive Outputs 0-7 + * Write: FET Drive Outputs 0-7 + * @in0_7: Read: Isolated Inputs 0-7 + * Write: Clear Interrupt + * @irq_ctl: Read: Enable IRQ + * Write: Disable IRQ + * @filter_ctl: Read: Activate Input Filters 0-15 + * Write: Deactivate Input Filters 0-15 + * @out8_15: Read: FET Drive Outputs 8-15 + * Write: FET Drive Outputs 8-15 + * @in8_15: Read: Isolated Inputs 8-15 + * Write: Unused + * @irq_status: Read: Interrupt status + * Write: Unused + */ +struct idio_16 { + u8 out0_7; + u8 in0_7; + u8 irq_ctl; + u8 filter_ctl; + u8 out8_15; + u8 in8_15; + u8 irq_status; +}; + +#define IDIO_16_NOUT 16 + +/** + * struct idio_16_state - IDIO-16 state structure + * @lock: synchronization lock for accessing device state + * @out_state: output signals state + */ +struct idio_16_state { + spinlock_t lock; + DECLARE_BITMAP(out_state, IDIO_16_NOUT); +}; + +/** + * idio_16_get_direction - get the I/O direction for a signal offset + * @offset: offset of signal to get direction + * + * Returns the signal direction (0=output, 1=input) for the signal at @offset. + */ +static inline int idio_16_get_direction(const unsigned long offset) +{ + return (offset >= IDIO_16_NOUT) ? 1 : 0; +} + +int idio_16_get(struct idio_16 __iomem *reg, struct idio_16_state *state, + unsigned long offset); +void idio_16_get_multiple(struct idio_16 __iomem *reg, + struct idio_16_state *state, + const unsigned long *mask, unsigned long *bits); +void idio_16_set(struct idio_16 __iomem *reg, struct idio_16_state *state, + unsigned long offset, unsigned long value); +void idio_16_set_multiple(struct idio_16 __iomem *reg, + struct idio_16_state *state, + const unsigned long *mask, const unsigned long *bits); +void idio_16_state_init(struct idio_16_state *state); + +#endif /* _IDIO_16_H_ */ diff --git a/drivers/gpio/gpio-latch.c b/drivers/gpio/gpio-latch.c new file mode 100644 index 0000000000000000000000000000000000000000..d7c3b20c8482ec2e9594a81e2f5254ed5ce1943e --- /dev/null +++ b/drivers/gpio/gpio-latch.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * GPIO latch driver + * + * Copyright (C) 2022 Sascha Hauer + * + * This driver implements a GPIO (or better GPO as there is no input) + * multiplexer based on latches like this: + * + * CLK0 ----------------------. ,--------. + * CLK1 -------------------. `--------|> #0 | + * | | | + * OUT0 ----------------+--|-----------|D0 Q0|-----|< + * OUT1 --------------+-|--|-----------|D1 Q1|-----|< + * OUT2 ------------+-|-|--|-----------|D2 Q2|-----|< + * OUT3 ----------+-|-|-|--|-----------|D3 Q3|-----|< + * OUT4 --------+-|-|-|-|--|-----------|D4 Q4|-----|< + * OUT5 ------+-|-|-|-|-|--|-----------|D5 Q5|-----|< + * OUT6 ----+-|-|-|-|-|-|--|-----------|D6 Q6|-----|< + * OUT7 --+-|-|-|-|-|-|-|--|-----------|D7 Q7|-----|< + * | | | | | | | | | `--------' + * | | | | | | | | | + * | | | | | | | | | ,--------. + * | | | | | | | | `-----------|> #1 | + * | | | | | | | | | | + * | | | | | | | `--------------|D0 Q0|-----|< + * | | | | | | `----------------|D1 Q1|-----|< + * | | | | | `------------------|D2 Q2|-----|< + * | | | | `--------------------|D3 Q3|-----|< + * | | | `----------------------|D4 Q4|-----|< + * | | `------------------------|D5 Q5|-----|< + * | `--------------------------|D6 Q6|-----|< + * `----------------------------|D7 Q7|-----|< + * `--------' + * + * The above is just an example. The actual number of number of latches and + * the number of inputs per latch is derived from the number of GPIOs given + * in the corresponding device tree properties. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "gpiolib.h" + +struct gpio_latch_priv { + struct gpio_chip gc; + struct gpio_descs *clk_gpios; + struct gpio_descs *latched_gpios; + int n_latched_gpios; + unsigned int setup_duration_ns; + unsigned int clock_duration_ns; + unsigned long *shadow; + /* + * Depending on whether any of the underlying GPIOs may sleep we either + * use a mutex or a spinlock to protect our shadow map. + */ + union { + struct mutex mutex; /* protects @shadow */ + spinlock_t spinlock; /* protects @shadow */ + }; +}; + +static int gpio_latch_get_direction(struct gpio_chip *gc, unsigned int offset) +{ + return GPIO_LINE_DIRECTION_OUT; +} + +static void gpio_latch_set_unlocked(struct gpio_latch_priv *priv, + void (*set)(struct gpio_desc *desc, int value), + unsigned int offset, bool val) +{ + int latch = offset / priv->n_latched_gpios; + int i; + + assign_bit(offset, priv->shadow, val); + + for (i = 0; i < priv->n_latched_gpios; i++) + set(priv->latched_gpios->desc[i], + test_bit(latch * priv->n_latched_gpios + i, priv->shadow)); + + ndelay(priv->setup_duration_ns); + set(priv->clk_gpios->desc[latch], 1); + ndelay(priv->clock_duration_ns); + set(priv->clk_gpios->desc[latch], 0); +} + +static void gpio_latch_set(struct gpio_chip *gc, unsigned int offset, int val) +{ + struct gpio_latch_priv *priv = gpiochip_get_data(gc); + unsigned long flags; + + spin_lock_irqsave(&priv->spinlock, flags); + + gpio_latch_set_unlocked(priv, gpiod_set_value, offset, val); + + spin_unlock_irqrestore(&priv->spinlock, flags); +} + +static void gpio_latch_set_can_sleep(struct gpio_chip *gc, unsigned int offset, int val) +{ + struct gpio_latch_priv *priv = gpiochip_get_data(gc); + + mutex_lock(&priv->mutex); + + gpio_latch_set_unlocked(priv, gpiod_set_value_cansleep, offset, val); + + mutex_unlock(&priv->mutex); +} + +static bool gpio_latch_can_sleep(struct gpio_latch_priv *priv, unsigned int n_latches) +{ + int i; + + for (i = 0; i < n_latches; i++) + if (gpiod_cansleep(priv->clk_gpios->desc[i])) + return true; + + for (i = 0; i < priv->n_latched_gpios; i++) + if (gpiod_cansleep(priv->latched_gpios->desc[i])) + return true; + + return false; +} + +/* + * Some value which is still acceptable to delay in atomic context. + * If we need to go higher we might have to switch to usleep_range(), + * but that cannot ne used in atomic context and the driver would have + * to be adjusted to support that. + */ +#define DURATION_NS_MAX 5000 + +static int gpio_latch_probe(struct platform_device *pdev) +{ + struct gpio_latch_priv *priv; + unsigned int n_latches; + struct device_node *np = pdev->dev.of_node; + + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->clk_gpios = devm_gpiod_get_array(&pdev->dev, "clk", GPIOD_OUT_LOW); + if (IS_ERR(priv->clk_gpios)) + return PTR_ERR(priv->clk_gpios); + + priv->latched_gpios = devm_gpiod_get_array(&pdev->dev, "latched", GPIOD_OUT_LOW); + if (IS_ERR(priv->latched_gpios)) + return PTR_ERR(priv->latched_gpios); + + n_latches = priv->clk_gpios->ndescs; + priv->n_latched_gpios = priv->latched_gpios->ndescs; + + priv->shadow = devm_bitmap_zalloc(&pdev->dev, n_latches * priv->n_latched_gpios, + GFP_KERNEL); + if (!priv->shadow) + return -ENOMEM; + + if (gpio_latch_can_sleep(priv, n_latches)) { + priv->gc.can_sleep = true; + priv->gc.set = gpio_latch_set_can_sleep; + mutex_init(&priv->mutex); + } else { + priv->gc.can_sleep = false; + priv->gc.set = gpio_latch_set; + spin_lock_init(&priv->spinlock); + } + + of_property_read_u32(np, "setup-duration-ns", &priv->setup_duration_ns); + if (priv->setup_duration_ns > DURATION_NS_MAX) { + dev_warn(&pdev->dev, "setup-duration-ns too high, limit to %d\n", + DURATION_NS_MAX); + priv->setup_duration_ns = DURATION_NS_MAX; + } + + of_property_read_u32(np, "clock-duration-ns", &priv->clock_duration_ns); + if (priv->clock_duration_ns > DURATION_NS_MAX) { + dev_warn(&pdev->dev, "clock-duration-ns too high, limit to %d\n", + DURATION_NS_MAX); + priv->clock_duration_ns = DURATION_NS_MAX; + } + + priv->gc.get_direction = gpio_latch_get_direction; + priv->gc.ngpio = n_latches * priv->n_latched_gpios; + priv->gc.owner = THIS_MODULE; + priv->gc.base = -1; + priv->gc.parent = &pdev->dev; + + platform_set_drvdata(pdev, priv); + + return devm_gpiochip_add_data(&pdev->dev, &priv->gc, priv); +} + +static const struct of_device_id gpio_latch_ids[] = { + { + .compatible = "gpio-latch", + }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, gpio_latch_ids); + +static struct platform_driver gpio_latch_driver = { + .driver = { + .name = "gpio-latch", + .of_match_table = gpio_latch_ids, + }, + .probe = gpio_latch_probe, +}; +module_platform_driver(gpio_latch_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Sascha Hauer "); +MODULE_DESCRIPTION("GPIO latch driver"); diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c index 43da381a4d7e7ed322a25fdc5304b9014d0345ef..cf482f4f0098297a3ab59348082ee051634fdd37 100644 --- a/drivers/gpio/gpio-max7300.c +++ b/drivers/gpio/gpio-max7300.c @@ -28,8 +28,7 @@ static int max7300_i2c_read(struct device *dev, unsigned int reg) return i2c_smbus_read_byte_data(client, reg); } -static int max7300_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max7300_probe(struct i2c_client *client) { struct max7301 *ts; @@ -63,7 +62,7 @@ static struct i2c_driver max7300_driver = { .driver = { .name = "max7300", }, - .probe = max7300_probe, + .probe_new = max7300_probe, .remove = max7300_remove, .id_table = max7300_id, }; diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c index da69721170308e3629a59809f9427addc15bbaf8..68e982cdee734ddf17761793575979e8d7fcc7e2 100644 --- a/drivers/gpio/gpio-max732x.c +++ b/drivers/gpio/gpio-max732x.c @@ -608,9 +608,9 @@ static struct max732x_platform_data *of_gpio_max732x(struct device *dev) return pdata; } -static int max732x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int max732x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct max732x_platform_data *pdata; struct device_node *node; struct max732x_chip *chip; @@ -707,7 +707,7 @@ static struct i2c_driver max732x_driver = { .name = "max732x", .of_match_table = of_match_ptr(max732x_of_table), }, - .probe = max732x_probe, + .probe_new = max732x_probe, .id_table = max732x_id, }; diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c index 72ac09a597029ff87e7004c27a25020afa217b75..92ea8411050dda9ae5ed1c93338287443a1848e2 100644 --- a/drivers/gpio/gpio-merrifield.c +++ b/drivers/gpio/gpio-merrifield.c @@ -14,6 +14,7 @@ #include #include #include +#include #define GCCR 0x000 /* controller configuration */ #define GPLR 0x004 /* pin level r/o */ @@ -331,7 +332,7 @@ static int mrfld_irq_set_wake(struct irq_data *d, unsigned int on) raw_spin_unlock_irqrestore(&priv->lock, flags); - dev_dbg(priv->dev, "%sable wake for gpio %u\n", on ? "en" : "dis", gpio); + dev_dbg(priv->dev, "%s wake for gpio %u\n", str_enable_disable(on), gpio); return 0; } diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c index 6e67867e1dcd48ecaf919e44c6e6b8ed53649247..a59d61cd44b2e5eceac9bf97c41945ac77bde0f2 100644 --- a/drivers/gpio/gpio-pca953x.c +++ b/drivers/gpio/gpio-pca953x.c @@ -1050,9 +1050,9 @@ out: return ret; } -static int pca953x_probe(struct i2c_client *client, - const struct i2c_device_id *i2c_id) +static int pca953x_probe(struct i2c_client *client) { + const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client); struct pca953x_platform_data *pdata; struct pca953x_chip *chip; int irq_base = 0; @@ -1376,7 +1376,7 @@ static struct i2c_driver pca953x_driver = { .of_match_table = pca953x_dt_ids, .acpi_match_table = pca953x_acpi_ids, }, - .probe = pca953x_probe, + .probe_new = pca953x_probe, .remove = pca953x_remove, .id_table = pca953x_id, }; diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c index ab2a652964ec2ff93e0cbe38cda455a749fb96eb..6c07a8811a7a5e740c9421504f1f781bd221c7a3 100644 --- a/drivers/gpio/gpio-pca9570.c +++ b/drivers/gpio/gpio-pca9570.c @@ -15,14 +15,28 @@ #include #include +#define SLG7XL45106_GPO_REG 0xDB + +/** + * struct pca9570_platform_data - GPIO platformdata + * @ngpio: no of gpios + * @command: Command to be sent + */ +struct pca9570_platform_data { + u16 ngpio; + u32 command; +}; + /** * struct pca9570 - GPIO driver data * @chip: GPIO controller chip + * @p_data: GPIO controller platform data * @lock: Protects write sequences * @out: Buffer for device register */ struct pca9570 { struct gpio_chip chip; + const struct pca9570_platform_data *p_data; struct mutex lock; u8 out; }; @@ -32,7 +46,11 @@ static int pca9570_read(struct pca9570 *gpio, u8 *value) struct i2c_client *client = to_i2c_client(gpio->chip.parent); int ret; - ret = i2c_smbus_read_byte(client); + if (gpio->p_data->command != 0) + ret = i2c_smbus_read_byte_data(client, gpio->p_data->command); + else + ret = i2c_smbus_read_byte(client); + if (ret < 0) return ret; @@ -44,6 +62,9 @@ static int pca9570_write(struct pca9570 *gpio, u8 value) { struct i2c_client *client = to_i2c_client(gpio->chip.parent); + if (gpio->p_data->command != 0) + return i2c_smbus_write_byte_data(client, gpio->p_data->command, value); + return i2c_smbus_write_byte(client, value); } @@ -106,7 +127,8 @@ static int pca9570_probe(struct i2c_client *client) gpio->chip.get = pca9570_get; gpio->chip.set = pca9570_set; gpio->chip.base = -1; - gpio->chip.ngpio = (uintptr_t)device_get_match_data(&client->dev); + gpio->p_data = device_get_match_data(&client->dev); + gpio->chip.ngpio = gpio->p_data->ngpio; gpio->chip.can_sleep = true; mutex_init(&gpio->lock); @@ -119,16 +141,31 @@ static int pca9570_probe(struct i2c_client *client) return devm_gpiochip_add_data(&client->dev, &gpio->chip, gpio); } +static const struct pca9570_platform_data pca9570_gpio = { + .ngpio = 4, +}; + +static const struct pca9570_platform_data pca9571_gpio = { + .ngpio = 8, +}; + +static const struct pca9570_platform_data slg7xl45106_gpio = { + .ngpio = 8, + .command = SLG7XL45106_GPO_REG, +}; + static const struct i2c_device_id pca9570_id_table[] = { - { "pca9570", 4 }, - { "pca9571", 8 }, + { "pca9570", (kernel_ulong_t)&pca9570_gpio}, + { "pca9571", (kernel_ulong_t)&pca9571_gpio }, + { "slg7xl45106", (kernel_ulong_t)&slg7xl45106_gpio }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(i2c, pca9570_id_table); static const struct of_device_id pca9570_of_match_table[] = { - { .compatible = "nxp,pca9570", .data = (void *)4 }, - { .compatible = "nxp,pca9571", .data = (void *)8 }, + { .compatible = "dlg,slg7xl45106", .data = &slg7xl45106_gpio}, + { .compatible = "nxp,pca9570", .data = &pca9570_gpio }, + { .compatible = "nxp,pca9571", .data = &pca9571_gpio }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, pca9570_of_match_table); diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c index e98ea47d7237985e51dda9016c2193f4ae58fc05..cec2f2c78255f5207de75d11b7a85ed8d69b01a1 100644 --- a/drivers/gpio/gpio-pcf857x.c +++ b/drivers/gpio/gpio-pcf857x.c @@ -247,9 +247,9 @@ static const struct irq_chip pcf857x_irq_chip = { /*-------------------------------------------------------------------------*/ -static int pcf857x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pcf857x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct pcf857x_platform_data *pdata = dev_get_platdata(&client->dev); struct device_node *np = client->dev.of_node; struct pcf857x *gpio; @@ -422,7 +422,7 @@ static struct i2c_driver pcf857x_driver = { .name = "pcf857x", .of_match_table = of_match_ptr(pcf857x_of_table), }, - .probe = pcf857x_probe, + .probe_new = pcf857x_probe, .remove = pcf857x_remove, .shutdown = pcf857x_shutdown, .id_table = pcf857x_id, diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c index 71a13a394050ffe65d1932e60e8d28ee9409af3c..a86ce748384bba0b0a9603566b068fe098f805a5 100644 --- a/drivers/gpio/gpio-pci-idio-16.c +++ b/drivers/gpio/gpio-pci-idio-16.c @@ -3,8 +3,7 @@ * GPIO driver for the ACCES PCI-IDIO-16 * Copyright (C) 2017 William Breathitt Gray */ -#include -#include +#include #include #include #include @@ -16,51 +15,28 @@ #include #include -/** - * struct idio_16_gpio_reg - GPIO device registers structure - * @out0_7: Read: FET Drive Outputs 0-7 - * Write: FET Drive Outputs 0-7 - * @in0_7: Read: Isolated Inputs 0-7 - * Write: Clear Interrupt - * @irq_ctl: Read: Enable IRQ - * Write: Disable IRQ - * @filter_ctl: Read: Activate Input Filters 0-15 - * Write: Deactivate Input Filters 0-15 - * @out8_15: Read: FET Drive Outputs 8-15 - * Write: FET Drive Outputs 8-15 - * @in8_15: Read: Isolated Inputs 8-15 - * Write: Unused - * @irq_status: Read: Interrupt status - * Write: Unused - */ -struct idio_16_gpio_reg { - u8 out0_7; - u8 in0_7; - u8 irq_ctl; - u8 filter_ctl; - u8 out8_15; - u8 in8_15; - u8 irq_status; -}; +#include "gpio-idio-16.h" /** * struct idio_16_gpio - GPIO device private data structure * @chip: instance of the gpio_chip * @lock: synchronization lock to prevent I/O race conditions * @reg: I/O address offset for the GPIO device registers + * @state: ACCES IDIO-16 device state * @irq_mask: I/O bits affected by interrupts */ struct idio_16_gpio { struct gpio_chip chip; raw_spinlock_t lock; - struct idio_16_gpio_reg __iomem *reg; + struct idio_16 __iomem *reg; + struct idio_16_state state; unsigned long irq_mask; }; static int idio_16_gpio_get_direction(struct gpio_chip *chip, unsigned int offset) { - if (offset > 15) + if (idio_16_get_direction(offset)) return GPIO_LINE_DIRECTION_IN; return GPIO_LINE_DIRECTION_OUT; @@ -82,43 +58,16 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip, static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long mask = BIT(offset); - - if (offset < 8) - return !!(ioread8(&idio16gpio->reg->out0_7) & mask); - - if (offset < 16) - return !!(ioread8(&idio16gpio->reg->out8_15) & (mask >> 8)); - - if (offset < 24) - return !!(ioread8(&idio16gpio->reg->in0_7) & (mask >> 16)); - return !!(ioread8(&idio16gpio->reg->in8_15) & (mask >> 24)); + return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset); } static int idio_16_gpio_get_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long offset; - unsigned long gpio_mask; - void __iomem *ports[] = { - &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, - &idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15, - }; - void __iomem *port_addr; - unsigned long port_state; - - /* clear bits array to a clean slate */ - bitmap_zero(bits, chip->ngpio); - - for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { - port_addr = ports[offset / 8]; - port_state = ioread8(port_addr) & gpio_mask; - - bitmap_set_value8(bits, port_state, offset); - } + idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); return 0; } @@ -126,61 +75,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset, int value) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned int mask = BIT(offset); - void __iomem *base; - unsigned long flags; - unsigned int out_state; - - if (offset > 15) - return; - - if (offset > 7) { - mask >>= 8; - base = &idio16gpio->reg->out8_15; - } else - base = &idio16gpio->reg->out0_7; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - if (value) - out_state = ioread8(base) | mask; - else - out_state = ioread8(base) & ~mask; - - iowrite8(out_state, base); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); + idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value); } static void idio_16_gpio_set_multiple(struct gpio_chip *chip, unsigned long *mask, unsigned long *bits) { struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip); - unsigned long offset; - unsigned long gpio_mask; - void __iomem *ports[] = { - &idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15, - }; - size_t index; - void __iomem *port_addr; - unsigned long bitmask; - unsigned long flags; - unsigned long out_state; - for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) { - index = offset / 8; - port_addr = ports[index]; - - bitmask = bitmap_get_value8(bits, offset) & gpio_mask; - - raw_spin_lock_irqsave(&idio16gpio->lock, flags); - - out_state = ioread8(port_addr) & ~gpio_mask; - out_state |= bitmask; - iowrite8(out_state, port_addr); - - raw_spin_unlock_irqrestore(&idio16gpio->lock, flags); - } + idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits); } static void idio_16_irq_ack(struct irq_data *data) @@ -335,6 +239,8 @@ static int idio_16_probe(struct pci_dev *pdev, const struct pci_device_id *id) idio16gpio->chip.set = idio_16_gpio_set; idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple; + idio_16_state_init(&idio16gpio->state); + girq = &idio16gpio->chip.irq; girq->chip = &idio_16_irqchip; /* This will let us handle the parent IRQ in the driver */ @@ -379,3 +285,4 @@ module_pci_driver(idio_16_driver); MODULE_AUTHOR("William Breathitt Gray "); MODULE_DESCRIPTION("ACCES PCI-IDIO-16 GPIO driver"); MODULE_LICENSE("GPL v2"); +MODULE_IMPORT_NS(GPIO_IDIO_16); diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c index 1020c2feb24964303c279fa352af7839991a8775..60514bc5454f8ceb258e0647910b91fd1642cd8e 100644 --- a/drivers/gpio/gpio-sim.c +++ b/drivers/gpio/gpio-sim.c @@ -31,6 +31,7 @@ #include "gpiolib.h" +#define GPIO_SIM_NGPIO_MAX 1024 #define GPIO_SIM_PROP_MAX 4 /* Max 3 properties + sentinel. */ #define GPIO_SIM_NUM_ATTRS 3 /* value, pull and sentinel */ @@ -371,6 +372,9 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev) if (ret) return ret; + if (num_lines > GPIO_SIM_NGPIO_MAX) + return -ERANGE; + ret = fwnode_property_read_string(swnode, "gpio-sim,label", &label); if (ret) { label = devm_kasprintf(dev, GFP_KERNEL, "%s-%s", diff --git a/drivers/gpio/gpio-sl28cpld.c b/drivers/gpio/gpio-sl28cpld.c index 52404736ac861bb16ea8e9bc7ef2bfb70f54bd56..2195f88c204855a6797841f13e6eb47cccac2859 100644 --- a/drivers/gpio/gpio-sl28cpld.c +++ b/drivers/gpio/gpio-sl28cpld.c @@ -70,8 +70,7 @@ static int sl28cpld_gpio_irq_init(struct platform_device *pdev, irq_chip->num_irqs = ARRAY_SIZE(sl28cpld_gpio_irqs); irq_chip->num_regs = 1; irq_chip->status_base = base + GPIO_REG_IP; - irq_chip->mask_base = base + GPIO_REG_IE; - irq_chip->mask_invert = true; + irq_chip->unmask_base = base + GPIO_REG_IE; irq_chip->ack_base = base + GPIO_REG_IP; ret = devm_regmap_add_irq_chip_fwnode(dev, dev_fwnode(dev), diff --git a/drivers/gpio/gpio-sta2x11.c b/drivers/gpio/gpio-sta2x11.c deleted file mode 100644 index e07cca0f8d35388dafd7a6374b95f054b07063cd..0000000000000000000000000000000000000000 --- a/drivers/gpio/gpio-sta2x11.c +++ /dev/null @@ -1,411 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * STMicroelectronics ConneXt (STA2X11) GPIO driver - * - * Copyright 2012 ST Microelectronics (Alessandro Rubini) - * Based on gpio-ml-ioh.c, Copyright 2010 OKI Semiconductors Ltd. - * Also based on previous sta2x11 work, Copyright 2011 Wind River Systems, Inc. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct gsta_regs { - u32 dat; /* 0x00 */ - u32 dats; - u32 datc; - u32 pdis; - u32 dir; /* 0x10 */ - u32 dirs; - u32 dirc; - u32 unused_1c; - u32 afsela; /* 0x20 */ - u32 unused_24[7]; - u32 rimsc; /* 0x40 */ - u32 fimsc; - u32 is; - u32 ic; -}; - -struct gsta_gpio { - spinlock_t lock; - struct device *dev; - void __iomem *reg_base; - struct gsta_regs __iomem *regs[GSTA_NR_BLOCKS]; - struct gpio_chip gpio; - int irq_base; - /* FIXME: save the whole config here (AF, ...) */ - unsigned irq_type[GSTA_NR_GPIO]; -}; - -/* - * gpio methods - */ - -static void gsta_gpio_set(struct gpio_chip *gpio, unsigned nr, int val) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - if (val) - writel(bit, ®s->dats); - else - writel(bit, ®s->datc); -} - -static int gsta_gpio_get(struct gpio_chip *gpio, unsigned nr) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - return !!(readl(®s->dat) & bit); -} - -static int gsta_gpio_direction_output(struct gpio_chip *gpio, unsigned nr, - int val) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - writel(bit, ®s->dirs); - /* Data register after direction, otherwise pullup/down is selected */ - if (val) - writel(bit, ®s->dats); - else - writel(bit, ®s->datc); - return 0; -} - -static int gsta_gpio_direction_input(struct gpio_chip *gpio, unsigned nr) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - - writel(bit, ®s->dirc); - return 0; -} - -static int gsta_gpio_to_irq(struct gpio_chip *gpio, unsigned offset) -{ - struct gsta_gpio *chip = gpiochip_get_data(gpio); - return chip->irq_base + offset; -} - -static void gsta_gpio_setup(struct gsta_gpio *chip) /* called from probe */ -{ - struct gpio_chip *gpio = &chip->gpio; - - /* - * ARCH_NR_GPIOS is currently 256 and dynamic allocation starts - * from the end. However, for compatibility, we need the first - * ConneXt device to start from gpio 0: it's the main chipset - * on most boards so documents and drivers assume gpio0..gpio127 - */ - static int gpio_base; - - gpio->label = dev_name(chip->dev); - gpio->owner = THIS_MODULE; - gpio->direction_input = gsta_gpio_direction_input; - gpio->get = gsta_gpio_get; - gpio->direction_output = gsta_gpio_direction_output; - gpio->set = gsta_gpio_set; - gpio->dbg_show = NULL; - gpio->base = gpio_base; - gpio->ngpio = GSTA_NR_GPIO; - gpio->can_sleep = false; - gpio->to_irq = gsta_gpio_to_irq; - - /* - * After the first device, turn to dynamic gpio numbers. - * For example, with ARCH_NR_GPIOS = 256 we can fit two cards - */ - if (!gpio_base) - gpio_base = -1; -} - -/* - * Special method: alternate functions and pullup/pulldown. This is only - * invoked on startup to configure gpio's according to platform data. - * FIXME : this functionality shall be managed (and exported to other drivers) - * via the pin control subsystem. - */ -static void gsta_set_config(struct gsta_gpio *chip, int nr, unsigned cfg) -{ - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - unsigned long flags; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - int err = 0; - - pr_info("%s: %p %i %i\n", __func__, chip, nr, cfg); - - if (cfg == PINMUX_TYPE_NONE) - return; - - /* Alternate function or not? */ - spin_lock_irqsave(&chip->lock, flags); - val = readl(®s->afsela); - if (cfg == PINMUX_TYPE_FUNCTION) - val |= bit; - else - val &= ~bit; - writel(val | bit, ®s->afsela); - if (cfg == PINMUX_TYPE_FUNCTION) { - spin_unlock_irqrestore(&chip->lock, flags); - return; - } - - /* not alternate function: set details */ - switch (cfg) { - case PINMUX_TYPE_OUTPUT_LOW: - writel(bit, ®s->dirs); - writel(bit, ®s->datc); - break; - case PINMUX_TYPE_OUTPUT_HIGH: - writel(bit, ®s->dirs); - writel(bit, ®s->dats); - break; - case PINMUX_TYPE_INPUT: - writel(bit, ®s->dirc); - val = readl(®s->pdis) | bit; - writel(val, ®s->pdis); - break; - case PINMUX_TYPE_INPUT_PULLUP: - writel(bit, ®s->dirc); - val = readl(®s->pdis) & ~bit; - writel(val, ®s->pdis); - writel(bit, ®s->dats); - break; - case PINMUX_TYPE_INPUT_PULLDOWN: - writel(bit, ®s->dirc); - val = readl(®s->pdis) & ~bit; - writel(val, ®s->pdis); - writel(bit, ®s->datc); - break; - default: - err = 1; - } - spin_unlock_irqrestore(&chip->lock, flags); - if (err) - pr_err("%s: chip %p, pin %i, cfg %i is invalid\n", - __func__, chip, nr, cfg); -} - -/* - * Irq methods - */ - -static void gsta_irq_disable(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - struct gsta_gpio *chip = gc->private; - int nr = data->irq - chip->irq_base; - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - unsigned long flags; - - spin_lock_irqsave(&chip->lock, flags); - if (chip->irq_type[nr] & IRQ_TYPE_EDGE_RISING) { - val = readl(®s->rimsc) & ~bit; - writel(val, ®s->rimsc); - } - if (chip->irq_type[nr] & IRQ_TYPE_EDGE_FALLING) { - val = readl(®s->fimsc) & ~bit; - writel(val, ®s->fimsc); - } - spin_unlock_irqrestore(&chip->lock, flags); - return; -} - -static void gsta_irq_enable(struct irq_data *data) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data); - struct gsta_gpio *chip = gc->private; - int nr = data->irq - chip->irq_base; - struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK]; - u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK); - u32 val; - int type; - unsigned long flags; - - type = chip->irq_type[nr]; - - spin_lock_irqsave(&chip->lock, flags); - val = readl(®s->rimsc); - if (type & IRQ_TYPE_EDGE_RISING) - writel(val | bit, ®s->rimsc); - else - writel(val & ~bit, ®s->rimsc); - val = readl(®s->rimsc); - if (type & IRQ_TYPE_EDGE_FALLING) - writel(val | bit, ®s->fimsc); - else - writel(val & ~bit, ®s->fimsc); - spin_unlock_irqrestore(&chip->lock, flags); - return; -} - -static int gsta_irq_type(struct irq_data *d, unsigned int type) -{ - struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d); - struct gsta_gpio *chip = gc->private; - int nr = d->irq - chip->irq_base; - - /* We only support edge interrupts */ - if (!(type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))) { - pr_debug("%s: unsupported type 0x%x\n", __func__, type); - return -EINVAL; - } - - chip->irq_type[nr] = type; /* used for enable/disable */ - - gsta_irq_enable(d); - return 0; -} - -static irqreturn_t gsta_gpio_handler(int irq, void *dev_id) -{ - struct gsta_gpio *chip = dev_id; - struct gsta_regs __iomem *regs; - u32 is; - int i, nr, base; - irqreturn_t ret = IRQ_NONE; - - for (i = 0; i < GSTA_NR_BLOCKS; i++) { - regs = chip->regs[i]; - base = chip->irq_base + i * GSTA_GPIO_PER_BLOCK; - while ((is = readl(®s->is))) { - nr = __ffs(is); - irq = base + nr; - generic_handle_irq(irq); - writel(1 << nr, ®s->ic); - ret = IRQ_HANDLED; - } - } - return ret; -} - -static int gsta_alloc_irq_chip(struct gsta_gpio *chip) -{ - struct irq_chip_generic *gc; - struct irq_chip_type *ct; - int rv; - - gc = devm_irq_alloc_generic_chip(chip->dev, KBUILD_MODNAME, 1, - chip->irq_base, - chip->reg_base, handle_simple_irq); - if (!gc) - return -ENOMEM; - - gc->private = chip; - ct = gc->chip_types; - - ct->chip.irq_set_type = gsta_irq_type; - ct->chip.irq_disable = gsta_irq_disable; - ct->chip.irq_enable = gsta_irq_enable; - - /* FIXME: this makes at most 32 interrupts. Request 0 by now */ - rv = devm_irq_setup_generic_chip(chip->dev, gc, - 0 /* IRQ_MSK(GSTA_GPIO_PER_BLOCK) */, - 0, IRQ_NOREQUEST | IRQ_NOPROBE, 0); - if (rv) - return rv; - - /* Set up all 128 interrupts: code from setup_generic_chip */ - { - struct irq_chip_type *ct = gc->chip_types; - int i, j; - for (j = 0; j < GSTA_NR_GPIO; j++) { - i = chip->irq_base + j; - irq_set_chip_and_handler(i, &ct->chip, ct->handler); - irq_set_chip_data(i, gc); - irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE); - } - gc->irq_cnt = i - gc->irq_base; - } - - return 0; -} - -/* The platform device used here is instantiated by the MFD device */ -static int gsta_probe(struct platform_device *dev) -{ - int i, err; - struct pci_dev *pdev; - struct sta2x11_gpio_pdata *gpio_pdata; - struct gsta_gpio *chip; - - pdev = *(struct pci_dev **)dev_get_platdata(&dev->dev); - gpio_pdata = dev_get_platdata(&pdev->dev); - - if (gpio_pdata == NULL) - dev_err(&dev->dev, "no gpio config\n"); - pr_debug("gpio config: %p\n", gpio_pdata); - - chip = devm_kzalloc(&dev->dev, sizeof(*chip), GFP_KERNEL); - if (!chip) - return -ENOMEM; - chip->dev = &dev->dev; - chip->reg_base = devm_platform_ioremap_resource(dev, 0); - if (IS_ERR(chip->reg_base)) - return PTR_ERR(chip->reg_base); - - for (i = 0; i < GSTA_NR_BLOCKS; i++) { - chip->regs[i] = chip->reg_base + i * 4096; - /* disable all irqs */ - writel(0, &chip->regs[i]->rimsc); - writel(0, &chip->regs[i]->fimsc); - writel(~0, &chip->regs[i]->ic); - } - spin_lock_init(&chip->lock); - gsta_gpio_setup(chip); - if (gpio_pdata) - for (i = 0; i < GSTA_NR_GPIO; i++) - gsta_set_config(chip, i, gpio_pdata->pinconfig[i]); - - /* 384 was used in previous code: be compatible for other drivers */ - err = devm_irq_alloc_descs(&dev->dev, -1, 384, - GSTA_NR_GPIO, NUMA_NO_NODE); - if (err < 0) { - dev_warn(&dev->dev, "sta2x11 gpio: Can't get irq base (%i)\n", - -err); - return err; - } - chip->irq_base = err; - - err = gsta_alloc_irq_chip(chip); - if (err) - return err; - - err = devm_request_irq(&dev->dev, pdev->irq, gsta_gpio_handler, - IRQF_SHARED, KBUILD_MODNAME, chip); - if (err < 0) { - dev_err(&dev->dev, "sta2x11 gpio: Can't request irq (%i)\n", - -err); - return err; - } - - return devm_gpiochip_add_data(&dev->dev, &chip->gpio, chip); -} - -static struct platform_driver sta2x11_gpio_platform_driver = { - .driver = { - .name = "sta2x11-gpio", - .suppress_bind_attrs = true, - }, - .probe = gsta_probe, -}; -builtin_platform_driver(sta2x11_gpio_platform_driver); diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c index d642c35cb97c0ce61bd40cc2a3c97ce7507e31c3..349c5fbd9b0274334dc549de78b0826a75d0a9f0 100644 --- a/drivers/gpio/gpio-tpic2810.c +++ b/drivers/gpio/gpio-tpic2810.c @@ -98,8 +98,7 @@ static const struct of_device_id tpic2810_of_match_table[] = { }; MODULE_DEVICE_TABLE(of, tpic2810_of_match_table); -static int tpic2810_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int tpic2810_probe(struct i2c_client *client) { struct tpic2810 *gpio; int ret; @@ -144,7 +143,7 @@ static struct i2c_driver tpic2810_driver = { .name = "tpic2810", .of_match_table = tpic2810_of_match_table, }, - .probe = tpic2810_probe, + .probe_new = tpic2810_probe, .remove = tpic2810_remove, .id_table = tpic2810_id_table, }; diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c index 416725c26e949e65a90dbb0bf6065c1a82107880..43e8b66e04f781deb7f466e299ead30e6f449a99 100644 --- a/drivers/gpio/gpio-ts4900.c +++ b/drivers/gpio/gpio-ts4900.c @@ -136,8 +136,7 @@ static const struct of_device_id ts4900_gpio_of_match_table[] = { }; MODULE_DEVICE_TABLE(of, ts4900_gpio_of_match_table); -static int ts4900_gpio_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int ts4900_gpio_probe(struct i2c_client *client) { struct ts4900_gpio_priv *priv; u32 ngpio; @@ -186,7 +185,7 @@ static struct i2c_driver ts4900_gpio_driver = { .name = "ts4900-gpio", .of_match_table = ts4900_gpio_of_match_table, }, - .probe = ts4900_gpio_probe, + .probe_new = ts4900_gpio_probe, .id_table = ts4900_gpio_id_table, }; module_i2c_driver(ts4900_gpio_driver); diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c index a7d2358736fe761bfdd08bc30f9c8b611f80fdb2..bed0380c51360a4e5e8cbd15959f685b4d2922fd 100644 --- a/drivers/gpio/gpiolib-acpi.c +++ b/drivers/gpio/gpiolib-acpi.c @@ -89,6 +89,30 @@ struct acpi_gpio_chip { struct list_head deferred_req_irqs_list_entry; }; +/** + * struct acpi_gpio_info - ACPI GPIO specific information + * @adev: reference to ACPI device which consumes GPIO resource + * @flags: GPIO initialization flags + * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo + * @pin_config: pin bias as provided by ACPI + * @polarity: interrupt polarity as provided by ACPI + * @triggering: triggering type as provided by ACPI + * @wake_capable: wake capability as provided by ACPI + * @debounce: debounce timeout as provided by ACPI + * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping + */ +struct acpi_gpio_info { + struct acpi_device *adev; + enum gpiod_flags flags; + bool gpioint; + int pin_config; + int polarity; + int triggering; + bool wake_capable; + unsigned int debounce; + unsigned int quirks; +}; + /* * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a @@ -512,7 +536,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip) if (ACPI_FAILURE(status)) return; - acpi_walk_resources(handle, "_AEI", + acpi_walk_resources(handle, METHOD_NAME__AEI, acpi_gpiochip_alloc_event, acpi_gpio); mutex_lock(&acpi_gpio_deferred_req_irqs_lock); @@ -670,8 +694,8 @@ __acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update) return ret; } -int -acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info) +static int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, + struct acpi_gpio_info *info) { struct device *dev = &info->adev->dev; enum gpiod_flags old = *flags; @@ -690,8 +714,8 @@ acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *inf return ret; } -int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info) +static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, + struct acpi_gpio_info *info) { switch (info->pin_config) { case ACPI_PIN_CONFIG_PULLUP: @@ -864,8 +888,9 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode, * function only returns the first. */ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev, - const char *propname, int index, - struct acpi_gpio_info *info) + const char *propname, + int index, + struct acpi_gpio_info *info) { struct acpi_gpio_lookup lookup; int ret; @@ -896,6 +921,44 @@ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev, return ret ? ERR_PTR(ret) : lookup.desc; } +/** + * acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node + * @fwnode: pointer to an ACPI firmware node to get the GPIO information from + * @propname: Property name of the GPIO + * @index: index of GpioIo/GpioInt resource (starting from %0) + * @info: info pointer to fill in (optional) + * + * This function uses the property-based GPIO lookup to get to the GPIO + * resource with the relevant information from a data-only ACPI firmware node + * and uses that to obtain the GPIO descriptor to return. + * + * If the GPIO cannot be translated or there is an error an ERR_PTR is + * returned. + */ +static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode, + const char *propname, + int index, + struct acpi_gpio_info *info) +{ + struct acpi_gpio_lookup lookup; + int ret; + + if (!is_acpi_data_node(fwnode)) + return ERR_PTR(-ENODEV); + + if (!propname) + return ERR_PTR(-EINVAL); + + lookup.index = index; + + ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); + if (ret) + return ERR_PTR(ret); + + ret = acpi_gpio_resource_lookup(&lookup, info); + return ret ? ERR_PTR(ret) : lookup.desc; +} + static bool acpi_can_fallback_to_crs(struct acpi_device *adev, const char *con_id) { @@ -906,13 +969,13 @@ static bool acpi_can_fallback_to_crs(struct acpi_device *adev, return con_id == NULL; } -struct gpio_desc *acpi_find_gpio(struct device *dev, +struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags) { - struct acpi_device *adev = ACPI_COMPANION(dev); + struct acpi_device *adev = to_acpi_device_node(fwnode); struct acpi_gpio_info info; struct gpio_desc *desc; char propname[32]; @@ -928,7 +991,12 @@ struct gpio_desc *acpi_find_gpio(struct device *dev, gpio_suffixes[i]); } - desc = acpi_get_gpiod_by_index(adev, propname, idx, &info); + if (adev) + desc = acpi_get_gpiod_by_index(adev, + propname, idx, &info); + else + desc = acpi_get_gpiod_from_data(fwnode, + propname, idx, &info); if (!IS_ERR(desc)) break; if (PTR_ERR(desc) == -EPROBE_DEFER) @@ -937,7 +1005,7 @@ struct gpio_desc *acpi_find_gpio(struct device *dev, /* Then from plain _CRS GPIOs */ if (IS_ERR(desc)) { - if (!acpi_can_fallback_to_crs(adev, con_id)) + if (!adev || !acpi_can_fallback_to_crs(adev, con_id)) return ERR_PTR(-ENOENT); desc = acpi_get_gpiod_by_index(adev, NULL, idx, &info); @@ -956,50 +1024,6 @@ struct gpio_desc *acpi_find_gpio(struct device *dev, return desc; } -/** - * acpi_node_get_gpiod() - get a GPIO descriptor from ACPI resources - * @fwnode: pointer to an ACPI firmware node to get the GPIO information from - * @propname: Property name of the GPIO - * @index: index of GpioIo/GpioInt resource (starting from %0) - * @info: info pointer to fill in (optional) - * - * If @fwnode is an ACPI device object, call acpi_get_gpiod_by_index() for it. - * Otherwise (i.e. it is a data-only non-device object), use the property-based - * GPIO lookup to get to the GPIO resource with the relevant information and use - * that to obtain the GPIO descriptor to return. - * - * If the GPIO cannot be translated or there is an error an ERR_PTR is - * returned. - */ -struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - struct acpi_gpio_info *info) -{ - struct acpi_gpio_lookup lookup; - struct acpi_device *adev; - int ret; - - adev = to_acpi_device_node(fwnode); - if (adev) - return acpi_get_gpiod_by_index(adev, propname, index, info); - - if (!is_acpi_data_node(fwnode)) - return ERR_PTR(-ENODEV); - - if (!propname) - return ERR_PTR(-EINVAL); - - memset(&lookup, 0, sizeof(lookup)); - lookup.index = index; - - ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup); - if (ret) - return ERR_PTR(ret); - - ret = acpi_gpio_resource_lookup(&lookup, info); - return ret ? ERR_PTR(ret) : lookup.desc; -} - /** * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number * @adev: pointer to a ACPI device to get IRQ from diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h index 01e0cb480a00cfba13c9a80fcbcbf36bd6316e4e..9475f99a9694148b0f22a761614fdab7de793053 100644 --- a/drivers/gpio/gpiolib-acpi.h +++ b/drivers/gpio/gpiolib-acpi.h @@ -22,30 +22,6 @@ struct gpio_chip; struct gpio_desc; struct gpio_device; -/** - * struct acpi_gpio_info - ACPI GPIO specific information - * @adev: reference to ACPI device which consumes GPIO resource - * @flags: GPIO initialization flags - * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo - * @pin_config: pin bias as provided by ACPI - * @polarity: interrupt polarity as provided by ACPI - * @triggering: triggering type as provided by ACPI - * @wake_capable: wake capability as provided by ACPI - * @debounce: debounce timeout as provided by ACPI - * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping - */ -struct acpi_gpio_info { - struct acpi_device *adev; - enum gpiod_flags flags; - bool gpioint; - int pin_config; - int polarity; - int triggering; - bool wake_capable; - unsigned int debounce; - unsigned int quirks; -}; - #ifdef CONFIG_ACPI void acpi_gpiochip_add(struct gpio_chip *chip); void acpi_gpiochip_remove(struct gpio_chip *chip); @@ -55,19 +31,11 @@ void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev); void acpi_gpiochip_request_interrupts(struct gpio_chip *chip); void acpi_gpiochip_free_interrupts(struct gpio_chip *chip); -int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, - struct acpi_gpio_info *info); -int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info); - -struct gpio_desc *acpi_find_gpio(struct device *dev, +struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags); -struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - struct acpi_gpio_info *info); int acpi_gpio_count(struct device *dev, const char *con_id); #else @@ -82,31 +50,13 @@ acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { } static inline void acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { } -static inline int -acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info) -{ - return 0; -} -static inline int -acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags, - struct acpi_gpio_info *info) -{ - return 0; -} - static inline struct gpio_desc * -acpi_find_gpio(struct device *dev, const char *con_id, +acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id, unsigned int idx, enum gpiod_flags *dflags, unsigned long *lookupflags) { return ERR_PTR(-ENOENT); } -static inline struct gpio_desc * -acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname, - int index, struct acpi_gpio_info *info) -{ - return ERR_PTR(-ENXIO); -} static inline int acpi_gpio_count(struct device *dev, const char *con_id) { return -ENODEV; diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c index 65b2c09a4576a1c6d680ba4c8849bc16a3caef16..e878e3f22b0e48e989c88d291f6f8de39980ba1a 100644 --- a/drivers/gpio/gpiolib-cdev.c +++ b/drivers/gpio/gpiolib-cdev.c @@ -57,6 +57,50 @@ static_assert(IS_ALIGNED(sizeof(struct gpio_v2_line_values), 8)); * interface to gpiolib GPIOs via ioctl()s. */ +typedef __poll_t (*poll_fn)(struct file *, struct poll_table_struct *); +typedef long (*ioctl_fn)(struct file *, unsigned int, unsigned long); +typedef ssize_t (*read_fn)(struct file *, char __user *, + size_t count, loff_t *); + +static __poll_t call_poll_locked(struct file *file, + struct poll_table_struct *wait, + struct gpio_device *gdev, poll_fn func) +{ + __poll_t ret; + + down_read(&gdev->sem); + ret = func(file, wait); + up_read(&gdev->sem); + + return ret; +} + +static long call_ioctl_locked(struct file *file, unsigned int cmd, + unsigned long arg, struct gpio_device *gdev, + ioctl_fn func) +{ + long ret; + + down_read(&gdev->sem); + ret = func(file, cmd, arg); + up_read(&gdev->sem); + + return ret; +} + +static ssize_t call_read_locked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps, + struct gpio_device *gdev, read_fn func) +{ + ssize_t ret; + + down_read(&gdev->sem); + ret = func(file, buf, count, f_ps); + up_read(&gdev->sem); + + return ret; +} + /* * GPIO line handle management */ @@ -193,8 +237,8 @@ static long linehandle_set_config(struct linehandle_state *lh, return 0; } -static long linehandle_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long linehandle_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct linehandle_state *lh = file->private_data; void __user *ip = (void __user *)arg; @@ -203,6 +247,9 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, unsigned int i; int ret; + if (!lh->gdev->chip) + return -ENODEV; + switch (cmd) { case GPIOHANDLE_GET_LINE_VALUES_IOCTL: /* NOTE: It's okay to read values of output lines */ @@ -249,6 +296,15 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd, } } +static long linehandle_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct linehandle_state *lh = file->private_data; + + return call_ioctl_locked(file, cmd, arg, lh->gdev, + linehandle_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long linehandle_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -412,7 +468,7 @@ out_free_lh: * @desc: the GPIO descriptor for this line. * @req: the corresponding line request * @irq: the interrupt triggered in response to events on this GPIO - * @eflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or + * @edflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or * GPIO_V2_LINE_FLAG_EDGE_FALLING, indicating the edge detection applied * @timestamp_ns: cache for the timestamp storing it between hardirq and * IRQ thread, used to bring the timestamp close to the actual event @@ -1380,12 +1436,15 @@ static long linereq_set_config(struct linereq *lr, void __user *ip) return ret; } -static long linereq_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long linereq_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct linereq *lr = file->private_data; void __user *ip = (void __user *)arg; + if (!lr->gdev->chip) + return -ENODEV; + switch (cmd) { case GPIO_V2_LINE_GET_VALUES_IOCTL: return linereq_get_values(lr, ip); @@ -1398,6 +1457,15 @@ static long linereq_ioctl(struct file *file, unsigned int cmd, } } +static long linereq_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct linereq *lr = file->private_data; + + return call_ioctl_locked(file, cmd, arg, lr->gdev, + linereq_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long linereq_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -1406,12 +1474,15 @@ static long linereq_ioctl_compat(struct file *file, unsigned int cmd, } #endif -static __poll_t linereq_poll(struct file *file, - struct poll_table_struct *wait) +static __poll_t linereq_poll_unlocked(struct file *file, + struct poll_table_struct *wait) { struct linereq *lr = file->private_data; __poll_t events = 0; + if (!lr->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &lr->wait, wait); if (!kfifo_is_empty_spinlocked_noirqsave(&lr->events, @@ -1421,16 +1492,25 @@ static __poll_t linereq_poll(struct file *file, return events; } -static ssize_t linereq_read(struct file *file, - char __user *buf, - size_t count, - loff_t *f_ps) +static __poll_t linereq_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct linereq *lr = file->private_data; + + return call_poll_locked(file, wait, lr->gdev, linereq_poll_unlocked); +} + +static ssize_t linereq_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) { struct linereq *lr = file->private_data; struct gpio_v2_line_event le; ssize_t bytes_read = 0; int ret; + if (!lr->gdev->chip) + return -ENODEV; + if (count < sizeof(le)) return -EINVAL; @@ -1475,6 +1555,15 @@ static ssize_t linereq_read(struct file *file, return bytes_read; } +static ssize_t linereq_read(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) +{ + struct linereq *lr = file->private_data; + + return call_read_locked(file, buf, count, f_ps, lr->gdev, + linereq_read_unlocked); +} + static void linereq_free(struct linereq *lr) { unsigned int i; @@ -1712,12 +1801,15 @@ struct lineevent_state { (GPIOEVENT_REQUEST_RISING_EDGE | \ GPIOEVENT_REQUEST_FALLING_EDGE) -static __poll_t lineevent_poll(struct file *file, - struct poll_table_struct *wait) +static __poll_t lineevent_poll_unlocked(struct file *file, + struct poll_table_struct *wait) { struct lineevent_state *le = file->private_data; __poll_t events = 0; + if (!le->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &le->wait, wait); if (!kfifo_is_empty_spinlocked_noirqsave(&le->events, &le->wait.lock)) @@ -1726,15 +1818,21 @@ static __poll_t lineevent_poll(struct file *file, return events; } +static __poll_t lineevent_poll(struct file *file, + struct poll_table_struct *wait) +{ + struct lineevent_state *le = file->private_data; + + return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked); +} + struct compat_gpioeevent_data { compat_u64 timestamp; u32 id; }; -static ssize_t lineevent_read(struct file *file, - char __user *buf, - size_t count, - loff_t *f_ps) +static ssize_t lineevent_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) { struct lineevent_state *le = file->private_data; struct gpioevent_data ge; @@ -1742,6 +1840,9 @@ static ssize_t lineevent_read(struct file *file, ssize_t ge_size; int ret; + if (!le->gdev->chip) + return -ENODEV; + /* * When compatible system call is being used the struct gpioevent_data, * in case of at least ia32, has different size due to the alignment @@ -1799,6 +1900,15 @@ static ssize_t lineevent_read(struct file *file, return bytes_read; } +static ssize_t lineevent_read(struct file *file, char __user *buf, + size_t count, loff_t *f_ps) +{ + struct lineevent_state *le = file->private_data; + + return call_read_locked(file, buf, count, f_ps, le->gdev, + lineevent_read_unlocked); +} + static void lineevent_free(struct lineevent_state *le) { if (le->irq) @@ -1816,13 +1926,16 @@ static int lineevent_release(struct inode *inode, struct file *file) return 0; } -static long lineevent_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) +static long lineevent_ioctl_unlocked(struct file *file, unsigned int cmd, + unsigned long arg) { struct lineevent_state *le = file->private_data; void __user *ip = (void __user *)arg; struct gpiohandle_data ghd; + if (!le->gdev->chip) + return -ENODEV; + /* * We can get the value for an event line but not set it, * because it is input by definition. @@ -1845,6 +1958,15 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd, return -EINVAL; } +static long lineevent_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + struct lineevent_state *le = file->private_data; + + return call_ioctl_locked(file, cmd, arg, le->gdev, + lineevent_ioctl_unlocked); +} + #ifdef CONFIG_COMPAT static long lineevent_ioctl_compat(struct file *file, unsigned int cmd, unsigned long arg) @@ -2403,12 +2525,15 @@ static int lineinfo_changed_notify(struct notifier_block *nb, return NOTIFY_OK; } -static __poll_t lineinfo_watch_poll(struct file *file, - struct poll_table_struct *pollt) +static __poll_t lineinfo_watch_poll_unlocked(struct file *file, + struct poll_table_struct *pollt) { struct gpio_chardev_data *cdev = file->private_data; __poll_t events = 0; + if (!cdev->gdev->chip) + return EPOLLHUP | EPOLLERR; + poll_wait(file, &cdev->wait, pollt); if (!kfifo_is_empty_spinlocked_noirqsave(&cdev->events, @@ -2418,8 +2543,17 @@ static __poll_t lineinfo_watch_poll(struct file *file, return events; } -static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, - size_t count, loff_t *off) +static __poll_t lineinfo_watch_poll(struct file *file, + struct poll_table_struct *pollt) +{ + struct gpio_chardev_data *cdev = file->private_data; + + return call_poll_locked(file, pollt, cdev->gdev, + lineinfo_watch_poll_unlocked); +} + +static ssize_t lineinfo_watch_read_unlocked(struct file *file, char __user *buf, + size_t count, loff_t *off) { struct gpio_chardev_data *cdev = file->private_data; struct gpio_v2_line_info_changed event; @@ -2427,6 +2561,9 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, int ret; size_t event_size; + if (!cdev->gdev->chip) + return -ENODEV; + #ifndef CONFIG_GPIO_CDEV_V1 event_size = sizeof(struct gpio_v2_line_info_changed); if (count < event_size) @@ -2494,6 +2631,15 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, return bytes_read; } +static ssize_t lineinfo_watch_read(struct file *file, char __user *buf, + size_t count, loff_t *off) +{ + struct gpio_chardev_data *cdev = file->private_data; + + return call_read_locked(file, buf, count, off, cdev->gdev, + lineinfo_watch_read_unlocked); +} + /** * gpio_chrdev_open() - open the chardev for ioctl operations * @inode: inode for this chardev @@ -2507,13 +2653,17 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) struct gpio_chardev_data *cdev; int ret = -ENOMEM; + down_read(&gdev->sem); + /* Fail on open if the backing gpiochip is gone */ - if (!gdev->chip) - return -ENODEV; + if (!gdev->chip) { + ret = -ENODEV; + goto out_unlock; + } cdev = kzalloc(sizeof(*cdev), GFP_KERNEL); if (!cdev) - return -ENOMEM; + goto out_unlock; cdev->watched_lines = bitmap_zalloc(gdev->chip->ngpio, GFP_KERNEL); if (!cdev->watched_lines) @@ -2536,6 +2686,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file) if (ret) goto out_unregister_notifier; + up_read(&gdev->sem); + return ret; out_unregister_notifier: @@ -2545,6 +2697,8 @@ out_free_bitmap: bitmap_free(cdev->watched_lines); out_free_cdev: kfree(cdev); +out_unlock: + up_read(&gdev->sem); return ret; } diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c index 0e4e1291604d67da9fcfd3f9f1c4da86f963eb90..4fff7258ee41a6f511d303ff4e5f364ee7a71c80 100644 --- a/drivers/gpio/gpiolib-of.c +++ b/drivers/gpio/gpiolib-of.c @@ -85,7 +85,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data) { struct of_phandle_args *gpiospec = data; - return chip->gpiodev->dev.of_node == gpiospec->np && + return device_match_of_node(&chip->gpiodev->dev, gpiospec->np) && chip->of_xlate && chip->of_xlate(chip, gpiospec, NULL) >= 0; } @@ -112,55 +112,133 @@ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip, return gpiochip_get_desc(chip, ret); } -/** - * of_gpio_need_valid_mask() - figure out if the OF GPIO driver needs - * to set the .valid_mask - * @gc: the target gpio_chip - * - * Return: true if the valid mask needs to be set +/* + * Overrides stated polarity of a gpio line and warns when there is a + * discrepancy. */ -bool of_gpio_need_valid_mask(const struct gpio_chip *gc) +static void of_gpio_quirk_polarity(const struct device_node *np, + bool active_high, + enum of_gpio_flags *flags) { - int size; - const struct device_node *np = gc->of_node; + if (active_high) { + if (*flags & OF_GPIO_ACTIVE_LOW) { + pr_warn("%s GPIO handle specifies active low - ignored\n", + of_node_full_name(np)); + *flags &= ~OF_GPIO_ACTIVE_LOW; + } + } else { + if (!(*flags & OF_GPIO_ACTIVE_LOW)) + pr_info("%s enforce active low on GPIO handle\n", + of_node_full_name(np)); + *flags |= OF_GPIO_ACTIVE_LOW; + } +} - size = of_property_count_u32_elems(np, "gpio-reserved-ranges"); - if (size > 0 && size % 2 == 0) - return true; - return false; +/* + * This quirk does static polarity overrides in cases where existing + * DTS specified incorrect polarity. + */ +static void of_gpio_try_fixup_polarity(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags) +{ + static const struct { + const char *compatible; + const char *propname; + bool active_high; + } gpios[] = { +#if !IS_ENABLED(CONFIG_LCD_HX8357) + /* + * Himax LCD controllers used incorrectly named + * "gpios-reset" property and also specified wrong + * polarity. + */ + { "himax,hx8357", "gpios-reset", false }, + { "himax,hx8369", "gpios-reset", false }, +#endif + }; + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (of_device_is_compatible(np, gpios[i].compatible) && + !strcmp(propname, gpios[i].propname)) { + of_gpio_quirk_polarity(np, gpios[i].active_high, flags); + break; + } + } } -static void of_gpio_flags_quirks(const struct device_node *np, - const char *propname, - enum of_gpio_flags *flags, - int index) +static void of_gpio_set_polarity_by_property(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags) { - /* - * Some GPIO fixed regulator quirks. - * Note that active low is the default. - */ - if (IS_ENABLED(CONFIG_REGULATOR) && - (of_device_is_compatible(np, "regulator-fixed") || - of_device_is_compatible(np, "reg-fixed-voltage") || - (!(strcmp(propname, "enable-gpio") && - strcmp(propname, "enable-gpios")) && - of_device_is_compatible(np, "regulator-gpio")))) { - bool active_low = !of_property_read_bool(np, - "enable-active-high"); + static const struct { + const char *compatible; + const char *gpio_propname; + const char *polarity_propname; + } gpios[] = { +#if IS_ENABLED(CONFIG_FEC) + /* Freescale Fast Ethernet Controller */ + { "fsl,imx25-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx27-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx28-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6q-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,mvf600-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6sx-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx6ul-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx8mq-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,imx8qm-fec", "phy-reset-gpios", "phy-reset-active-high" }, + { "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" }, +#endif +#if IS_ENABLED(CONFIG_PCI_IMX6) + { "fsl,imx6q-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx6qp-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx7d-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mq-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mm-pcie", "reset-gpio", "reset-gpio-active-high" }, + { "fsl,imx8mp-pcie", "reset-gpio", "reset-gpio-active-high" }, +#endif + /* * The regulator GPIO handles are specified such that the * presence or absence of "enable-active-high" solely controls * the polarity of the GPIO line. Any phandle flags must * be actively ignored. */ - if ((*flags & OF_GPIO_ACTIVE_LOW) && !active_low) { - pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(np)); - *flags &= ~OF_GPIO_ACTIVE_LOW; +#if IS_ENABLED(CONFIG_REGULATOR_FIXED_VOLTAGE) + { "regulator-fixed", "gpios", "enable-active-high" }, + { "regulator-fixed", "gpio", "enable-active-high" }, + { "reg-fixed-voltage", "gpios", "enable-active-high" }, + { "reg-fixed-voltage", "gpio", "enable-active-high" }, +#endif +#if IS_ENABLED(CONFIG_REGULATOR_GPIO) + { "regulator-gpio", "enable-gpio", "enable-active-high" }, + { "regulator-gpio", "enable-gpios", "enable-active-high" }, +#endif + }; + unsigned int i; + bool active_high; + + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (of_device_is_compatible(np, gpios[i].compatible) && + !strcmp(propname, gpios[i].gpio_propname)) { + active_high = of_property_read_bool(np, + gpios[i].polarity_propname); + of_gpio_quirk_polarity(np, active_high, flags); + break; } - if (active_low) - *flags |= OF_GPIO_ACTIVE_LOW; } +} + +static void of_gpio_flags_quirks(const struct device_node *np, + const char *propname, + enum of_gpio_flags *flags, + int index) +{ + of_gpio_try_fixup_polarity(np, propname, flags); + of_gpio_set_polarity_by_property(np, propname, flags); + /* * Legacy open drain handling for fixed voltage regulators. */ @@ -200,18 +278,10 @@ static void of_gpio_flags_quirks(const struct device_node *np, * conflict and the "spi-cs-high" flag will * take precedence. */ - if (of_property_read_bool(child, "spi-cs-high")) { - if (*flags & OF_GPIO_ACTIVE_LOW) { - pr_warn("%s GPIO handle specifies active low - ignored\n", - of_node_full_name(child)); - *flags &= ~OF_GPIO_ACTIVE_LOW; - } - } else { - if (!(*flags & OF_GPIO_ACTIVE_LOW)) - pr_info("%s enforce active low on chipselect handle\n", - of_node_full_name(child)); - *flags |= OF_GPIO_ACTIVE_LOW; - } + bool active_high = of_property_read_bool(child, + "spi-cs-high"); + of_gpio_quirk_polarity(child, active_high, + flags); of_node_put(child); break; } @@ -365,127 +435,164 @@ struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node, } EXPORT_SYMBOL_GPL(gpiod_get_from_of_node); -/* - * The SPI GPIO bindings happened before we managed to establish that GPIO - * properties should be named "foo-gpios" so we have this special kludge for - * them. - */ -static struct gpio_desc *of_find_spi_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - char prop_name[32]; /* 32 is max size of property name */ - - /* - * Hopefully the compiler stubs the rest of the function if this - * is false. - */ - if (!IS_ENABLED(CONFIG_SPI_MASTER)) - return ERR_PTR(-ENOENT); - - /* Allow this specifically for "spi-gpio" devices */ - if (!of_device_is_compatible(np, "spi-gpio") || !con_id) - return ERR_PTR(-ENOENT); - - /* Will be "gpio-sck", "gpio-mosi" or "gpio-miso" */ - snprintf(prop_name, sizeof(prop_name), "%s-%s", "gpio", con_id); - - return of_get_named_gpiod_flags(np, prop_name, idx, of_flags); -} - -/* - * The old Freescale bindings use simply "gpios" as name for the chip select - * lines rather than "cs-gpios" like all other SPI hardware. Account for this - * with a special quirk. - */ -static struct gpio_desc *of_find_spi_cs_gpio(struct device_node *np, +static struct gpio_desc *of_find_gpio_rename(struct device_node *np, const char *con_id, unsigned int idx, enum of_gpio_flags *of_flags) { - if (!IS_ENABLED(CONFIG_SPI_MASTER)) - return ERR_PTR(-ENOENT); + static const struct of_rename_gpio { + const char *con_id; + const char *legacy_id; /* NULL - same as con_id */ + /* + * Compatible string can be set to NULL in case where + * matching to a particular compatible is not practical, + * but it should only be done for gpio names that have + * vendor prefix to reduce risk of false positives. + * Addition of such entries is strongly discouraged. + */ + const char *compatible; + } gpios[] = { +#if !IS_ENABLED(CONFIG_LCD_HX8357) + /* Himax LCD controllers used "gpios-reset" */ + { "reset", "gpios-reset", "himax,hx8357" }, + { "reset", "gpios-reset", "himax,hx8369" }, +#endif +#if IS_ENABLED(CONFIG_MFD_ARIZONA) + { "wlf,reset", NULL, NULL }, +#endif +#if IS_ENABLED(CONFIG_RTC_DRV_MOXART) + { "rtc-data", "gpio-rtc-data", "moxa,moxart-rtc" }, + { "rtc-sclk", "gpio-rtc-sclk", "moxa,moxart-rtc" }, + { "rtc-reset", "gpio-rtc-reset", "moxa,moxart-rtc" }, +#endif +#if IS_ENABLED(CONFIG_NFC_MRVL_I2C) + { "reset", "reset-n-io", "marvell,nfc-i2c" }, +#endif +#if IS_ENABLED(CONFIG_NFC_MRVL_SPI) + { "reset", "reset-n-io", "marvell,nfc-spi" }, +#endif +#if IS_ENABLED(CONFIG_NFC_MRVL_UART) + { "reset", "reset-n-io", "marvell,nfc-uart" }, + { "reset", "reset-n-io", "mrvl,nfc-uart" }, +#endif +#if !IS_ENABLED(CONFIG_PCI_LANTIQ) + /* MIPS Lantiq PCI */ + { "reset", "gpios-reset", "lantiq,pci-xway" }, +#endif - /* Allow this specifically for Freescale and PPC devices */ - if (!of_device_is_compatible(np, "fsl,spi") && - !of_device_is_compatible(np, "aeroflexgaisler,spictrl") && - !of_device_is_compatible(np, "ibm,ppc4xx-spi")) - return ERR_PTR(-ENOENT); - /* Allow only if asking for "cs-gpios" */ - if (!con_id || strcmp(con_id, "cs")) - return ERR_PTR(-ENOENT); + /* + * Some regulator bindings happened before we managed to + * establish that GPIO properties should be named + * "foo-gpios" so we have this special kludge for them. + */ +#if IS_ENABLED(CONFIG_REGULATOR_ARIZONA_LDO1) + { "wlf,ldoena", NULL, NULL }, /* Arizona */ +#endif +#if IS_ENABLED(CONFIG_REGULATOR_WM8994) + { "wlf,ldo1ena", NULL, NULL }, /* WM8994 */ + { "wlf,ldo2ena", NULL, NULL }, /* WM8994 */ +#endif - /* - * While all other SPI controllers use "cs-gpios" the Freescale - * uses just "gpios" so translate to that when "cs-gpios" is - * requested. - */ - return of_get_named_gpiod_flags(np, "gpios", idx, of_flags); -} +#if IS_ENABLED(CONFIG_SND_SOC_CS42L56) + { "reset", "cirrus,gpio-nreset", "cirrus,cs42l56" }, +#endif +#if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X) + { "reset", "gpio-reset", "ti,tlv320aic3x" }, + { "reset", "gpio-reset", "ti,tlv320aic33" }, + { "reset", "gpio-reset", "ti,tlv320aic3007" }, + { "reset", "gpio-reset", "ti,tlv320aic3104" }, + { "reset", "gpio-reset", "ti,tlv320aic3106" }, +#endif +#if IS_ENABLED(CONFIG_SPI_GPIO) + /* + * The SPI GPIO bindings happened before we managed to + * establish that GPIO properties should be named + * "foo-gpios" so we have this special kludge for them. + */ + { "miso", "gpio-miso", "spi-gpio" }, + { "mosi", "gpio-mosi", "spi-gpio" }, + { "sck", "gpio-sck", "spi-gpio" }, +#endif -/* - * Some regulator bindings happened before we managed to establish that GPIO - * properties should be named "foo-gpios" so we have this special kludge for - * them. - */ -static struct gpio_desc *of_find_regulator_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - /* These are the connection IDs we accept as legacy GPIO phandles */ - const char *whitelist[] = { - "wlf,ldoena", /* Arizona */ - "wlf,ldo1ena", /* WM8994 */ - "wlf,ldo2ena", /* WM8994 */ - }; - int i; + /* + * The old Freescale bindings use simply "gpios" as name + * for the chip select lines rather than "cs-gpios" like + * all other SPI hardware. Allow this specifically for + * Freescale and PPC devices. + */ +#if IS_ENABLED(CONFIG_SPI_FSL_SPI) + { "cs", "gpios", "fsl,spi" }, + { "cs", "gpios", "aeroflexgaisler,spictrl" }, +#endif +#if IS_ENABLED(CONFIG_SPI_PPC4xx) + { "cs", "gpios", "ibm,ppc4xx-spi" }, +#endif - if (!IS_ENABLED(CONFIG_REGULATOR)) - return ERR_PTR(-ENOENT); +#if IS_ENABLED(CONFIG_TYPEC_FUSB302) + /* + * Fairchild FUSB302 host is using undocumented "fcs,int_n" + * property without the compulsory "-gpios" suffix. + */ + { "fcs,int_n", NULL, "fcs,fusb302" }, +#endif + }; + struct gpio_desc *desc; + const char *legacy_id; + unsigned int i; if (!con_id) return ERR_PTR(-ENOENT); - i = match_string(whitelist, ARRAY_SIZE(whitelist), con_id); - if (i < 0) - return ERR_PTR(-ENOENT); + for (i = 0; i < ARRAY_SIZE(gpios); i++) { + if (strcmp(con_id, gpios[i].con_id)) + continue; + + if (gpios[i].compatible && + !of_device_is_compatible(np, gpios[i].compatible)) + continue; + + legacy_id = gpios[i].legacy_id ?: gpios[i].con_id; + desc = of_get_named_gpiod_flags(np, legacy_id, idx, of_flags); + if (!gpiod_not_found(desc)) { + pr_info("%s uses legacy gpio name '%s' instead of '%s-gpios'\n", + of_node_full_name(np), legacy_id, con_id); + return desc; + } + } - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); + return ERR_PTR(-ENOENT); } -static struct gpio_desc *of_find_arizona_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) +static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np, + const char *con_id, + unsigned int idx, + enum of_gpio_flags *of_flags) { - if (!IS_ENABLED(CONFIG_MFD_ARIZONA)) - return ERR_PTR(-ENOENT); + struct gpio_desc *desc; + const char *legacy_id; - if (!con_id || strcmp(con_id, "wlf,reset")) + if (!IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448)) return ERR_PTR(-ENOENT); - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); -} + if (!of_device_is_compatible(np, "mediatek,mt2701-cs42448-machine")) + return ERR_PTR(-ENOENT); -static struct gpio_desc *of_find_usb_gpio(struct device_node *np, - const char *con_id, - unsigned int idx, - enum of_gpio_flags *of_flags) -{ - /* - * Currently this USB quirk is only for the Fairchild FUSB302 host - * which is using an undocumented DT GPIO line named "fcs,int_n" - * without the compulsory "-gpios" suffix. - */ - if (!IS_ENABLED(CONFIG_TYPEC_FUSB302)) + if (!con_id || strcmp(con_id, "i2s1-in-sel")) return ERR_PTR(-ENOENT); - if (!con_id || strcmp(con_id, "fcs,int_n")) + if (idx == 0) + legacy_id = "i2s1-in-sel-gpio1"; + else if (idx == 1) + legacy_id = "i2s1-in-sel-gpio2"; + else return ERR_PTR(-ENOENT); - return of_get_named_gpiod_flags(np, con_id, idx, of_flags); + desc = of_get_named_gpiod_flags(np, legacy_id, 0, of_flags); + if (!gpiod_not_found(desc)) + pr_info("%s is using legacy gpio name '%s' instead of '%s-gpios'\n", + of_node_full_name(np), legacy_id, con_id); + + return desc; } typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np, @@ -493,15 +600,12 @@ typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np, unsigned int idx, enum of_gpio_flags *of_flags); static const of_find_gpio_quirk of_find_gpio_quirks[] = { - of_find_spi_gpio, - of_find_spi_cs_gpio, - of_find_regulator_gpio, - of_find_arizona_gpio, - of_find_usb_gpio, + of_find_gpio_rename, + of_find_mt2701_gpio, NULL }; -struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, +struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *flags) { char prop_name[32]; /* 32 is max size of property name */ @@ -519,8 +623,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, snprintf(prop_name, sizeof(prop_name), "%s", gpio_suffixes[i]); - desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx, - &of_flags); + desc = of_get_named_gpiod_flags(np, prop_name, idx, &of_flags); if (!gpiod_not_found(desc)) break; @@ -528,7 +631,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id, /* Properly named GPIO was not found, try workarounds */ for (q = of_find_gpio_quirks; gpiod_not_found(desc) && *q; q++) - desc = (*q)(dev->of_node, con_id, idx, &of_flags); + desc = (*q)(np, con_id, idx, &of_flags); if (IS_ERR(desc)) return desc; @@ -831,8 +934,8 @@ int of_mm_gpiochip_add_data(struct device_node *np, if (mm_gc->save_regs) mm_gc->save_regs(mm_gc); - of_node_put(mm_gc->gc.of_node); - mm_gc->gc.of_node = of_node_get(np); + fwnode_handle_put(mm_gc->gc.fwnode); + mm_gc->gc.fwnode = fwnode_handle_get(of_fwnode_handle(np)); ret = gpiochip_add_data(gc, data); if (ret) @@ -858,37 +961,12 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc) { struct gpio_chip *gc = &mm_gc->gc; - if (!mm_gc) - return; - gpiochip_remove(gc); iounmap(mm_gc->regs); kfree(gc->label); } EXPORT_SYMBOL_GPL(of_mm_gpiochip_remove); -static void of_gpiochip_init_valid_mask(struct gpio_chip *chip) -{ - int len, i; - u32 start, count; - struct device_node *np = chip->of_node; - - len = of_property_count_u32_elems(np, "gpio-reserved-ranges"); - if (len < 0 || len % 2 != 0) - return; - - for (i = 0; i < len; i += 2) { - of_property_read_u32_index(np, "gpio-reserved-ranges", - i, &start); - of_property_read_u32_index(np, "gpio-reserved-ranges", - i + 1, &count); - if (start >= chip->ngpio || start + count > chip->ngpio) - continue; - - bitmap_clear(chip->valid_mask, start, count); - } -}; - #ifdef CONFIG_PINCTRL static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { @@ -982,9 +1060,11 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { return 0; } int of_gpiochip_add(struct gpio_chip *chip) { + struct device_node *np; int ret; - if (!chip->of_node) + np = to_of_node(dev_fwnode(&chip->gpiodev->dev)); + if (!np) return 0; if (!chip->of_xlate) { @@ -995,24 +1075,22 @@ int of_gpiochip_add(struct gpio_chip *chip) if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS) return -EINVAL; - of_gpiochip_init_valid_mask(chip); - ret = of_gpiochip_add_pin_range(chip); if (ret) return ret; - of_node_get(chip->of_node); + fwnode_handle_get(chip->fwnode); ret = of_gpiochip_scan_gpios(chip); if (ret) - of_node_put(chip->of_node); + fwnode_handle_put(chip->fwnode); return ret; } void of_gpiochip_remove(struct gpio_chip *chip) { - of_node_put(chip->of_node); + fwnode_handle_put(chip->fwnode); } void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev) diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h index 1b5df39a952e0753c865eee823cc4944b6a223fe..a6c593e6766c59d03fc7060beeefbb9b36990763 100644 --- a/drivers/gpio/gpiolib-of.h +++ b/drivers/gpio/gpiolib-of.h @@ -16,17 +16,16 @@ struct gpio_desc; struct gpio_device; #ifdef CONFIG_OF_GPIO -struct gpio_desc *of_find_gpio(struct device *dev, +struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *lookupflags); int of_gpiochip_add(struct gpio_chip *gc); void of_gpiochip_remove(struct gpio_chip *gc); int of_gpio_get_count(struct device *dev, const char *con_id); -bool of_gpio_need_valid_mask(const struct gpio_chip *gc); void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev); #else -static inline struct gpio_desc *of_find_gpio(struct device *dev, +static inline struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id, unsigned int idx, unsigned long *lookupflags) @@ -39,10 +38,6 @@ static inline int of_gpio_get_count(struct device *dev, const char *con_id) { return 0; } -static inline bool of_gpio_need_valid_mask(const struct gpio_chip *gc) -{ - return false; -} static inline void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev) { diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c new file mode 100644 index 0000000000000000000000000000000000000000..dd9ccac214d19ec581f609f1709e57769175b931 --- /dev/null +++ b/drivers/gpio/gpiolib-swnode.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Software Node helpers for the GPIO API + * + * Copyright 2022 Google LLC + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "gpiolib.h" +#include "gpiolib-swnode.h" + +static void swnode_format_propname(const char *con_id, char *propname, + size_t max_size) +{ + /* + * Note we do not need to try both -gpios and -gpio suffixes, + * as, unlike OF and ACPI, we can fix software nodes to conform + * to the proper binding. + */ + if (con_id) + snprintf(propname, max_size, "%s-gpios", con_id); + else + strscpy(propname, "gpios", max_size); +} + +static int swnode_gpiochip_match_name(struct gpio_chip *chip, void *data) +{ + return !strcmp(chip->label, data); +} + +static struct gpio_chip *swnode_get_chip(struct fwnode_handle *fwnode) +{ + const struct software_node *chip_node; + struct gpio_chip *chip; + + chip_node = to_software_node(fwnode); + if (!chip_node || !chip_node->name) + return ERR_PTR(-EINVAL); + + chip = gpiochip_find((void *)chip_node->name, swnode_gpiochip_match_name); + return chip ?: ERR_PTR(-EPROBE_DEFER); +} + +struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode, + const char *con_id, unsigned int idx, + unsigned long *flags) +{ + const struct software_node *swnode; + struct fwnode_reference_args args; + struct gpio_chip *chip; + struct gpio_desc *desc; + char propname[32]; /* 32 is max size of property name */ + int error; + + swnode = to_software_node(fwnode); + if (!swnode) + return ERR_PTR(-EINVAL); + + swnode_format_propname(con_id, propname, sizeof(propname)); + + /* + * We expect all swnode-described GPIOs have GPIO number and + * polarity arguments, hence nargs is set to 2. + */ + error = fwnode_property_get_reference_args(fwnode, propname, NULL, 2, idx, &args); + if (error) { + pr_debug("%s: can't parse '%s' property of node '%pfwP[%d]'\n", + __func__, propname, fwnode, idx); + return ERR_PTR(error); + } + + chip = swnode_get_chip(args.fwnode); + fwnode_handle_put(args.fwnode); + if (IS_ERR(chip)) + return ERR_CAST(chip); + + desc = gpiochip_get_desc(chip, args.args[0]); + *flags = args.args[1]; /* We expect native GPIO flags */ + + pr_debug("%s: parsed '%s' property of node '%pfwP[%d]' - status (%d)\n", + __func__, propname, fwnode, idx, PTR_ERR_OR_ZERO(desc)); + + return desc; +} + +/** + * swnode_gpio_count - count the GPIOs associated with a device / function + * @fwnode: firmware node of the GPIO consumer, can be %NULL for + * system-global GPIOs + * @con_id: function within the GPIO consumer + * + * Return: + * The number of GPIOs associated with a device / function or %-ENOENT, + * if no GPIO has been assigned to the requested function. + */ +int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id) +{ + struct fwnode_reference_args args; + char propname[32]; + int count; + + swnode_format_propname(con_id, propname, sizeof(propname)); + + /* + * This is not very efficient, but GPIO lists usually have only + * 1 or 2 entries. + */ + count = 0; + while (fwnode_property_get_reference_args(fwnode, propname, NULL, 0, + count, &args) == 0) { + fwnode_handle_put(args.fwnode); + count++; + } + + return count ?: -ENOENT; +} diff --git a/drivers/gpio/gpiolib-swnode.h b/drivers/gpio/gpiolib-swnode.h new file mode 100644 index 0000000000000000000000000000000000000000..af849e56f6bce99454328117fa8cadd4397396f3 --- /dev/null +++ b/drivers/gpio/gpiolib-swnode.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef GPIOLIB_SWNODE_H +#define GPIOLIB_SWNODE_H + +struct fwnode_handle; +struct gpio_desc; + +struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode, + const char *con_id, unsigned int idx, + unsigned long *flags); +int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id); + +#endif /* GPIOLIB_SWNODE_H */ diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c index a70522aef3557e3e1b63aa6f68bab187ea969b6f..5a66d9616d7ccedf5e8a43235203760f29f942ee 100644 --- a/drivers/gpio/gpiolib.c +++ b/drivers/gpio/gpiolib.c @@ -26,6 +26,7 @@ #include "gpiolib.h" #include "gpiolib-of.h" #include "gpiolib-acpi.h" +#include "gpiolib-swnode.h" #include "gpiolib-cdev.h" #include "gpiolib-sysfs.h" @@ -183,14 +184,14 @@ EXPORT_SYMBOL_GPL(gpiod_to_chip); static int gpiochip_find_base(int ngpio) { struct gpio_device *gdev; - int base = ARCH_NR_GPIOS - ngpio; + int base = GPIO_DYNAMIC_BASE; - list_for_each_entry_reverse(gdev, &gpio_devices, list) { + list_for_each_entry(gdev, &gpio_devices, list) { /* found a free space? */ - if (gdev->base + gdev->ngpio <= base) + if (gdev->base >= base + ngpio) break; - /* nope, check the space right before the chip */ - base = gdev->base - ngpio; + /* nope, check the space right after the chip */ + base = gdev->base + gdev->ngpio; } if (gpio_is_valid(base)) { @@ -366,12 +367,12 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc) static int devprop_gpiochip_set_names(struct gpio_chip *chip) { struct gpio_device *gdev = chip->gpiodev; - struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev); + struct device *dev = &gdev->dev; const char **names; int ret, i; int count; - count = fwnode_property_string_array_count(fwnode, "gpio-line-names"); + count = device_property_string_array_count(dev, "gpio-line-names"); if (count < 0) return 0; @@ -384,7 +385,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip) * gpiochips. */ if (count <= chip->offset) { - dev_warn(&gdev->dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n", + dev_warn(dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n", count, chip->offset); return 0; } @@ -393,10 +394,10 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip) if (!names) return -ENOMEM; - ret = fwnode_property_read_string_array(fwnode, "gpio-line-names", + ret = device_property_read_string_array(dev, "gpio-line-names", names, count); if (ret < 0) { - dev_warn(&gdev->dev, "failed to read GPIO line names\n"); + dev_warn(dev, "failed to read GPIO line names\n"); kfree(names); return ret; } @@ -445,9 +446,22 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc) return p; } +static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc) +{ + struct device *dev = &gc->gpiodev->dev; + int size; + + /* Format is "start, count, ..." */ + size = device_property_count_u32(dev, "gpio-reserved-ranges"); + if (size > 0 && size % 2 == 0) + return size; + + return 0; +} + static int gpiochip_alloc_valid_mask(struct gpio_chip *gc) { - if (!(of_gpio_need_valid_mask(gc) || gc->init_valid_mask)) + if (!(gpiochip_count_reserved_ranges(gc) || gc->init_valid_mask)) return 0; gc->valid_mask = gpiochip_allocate_mask(gc); @@ -457,8 +471,50 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc) return 0; } +static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc) +{ + struct device *dev = &gc->gpiodev->dev; + unsigned int size; + u32 *ranges; + int ret; + + size = gpiochip_count_reserved_ranges(gc); + if (size == 0) + return 0; + + ranges = kmalloc_array(size, sizeof(*ranges), GFP_KERNEL); + if (!ranges) + return -ENOMEM; + + ret = device_property_read_u32_array(dev, "gpio-reserved-ranges", + ranges, size); + if (ret) { + kfree(ranges); + return ret; + } + + while (size) { + u32 count = ranges[--size]; + u32 start = ranges[--size]; + + if (start >= gc->ngpio || start + count > gc->ngpio) + continue; + + bitmap_clear(gc->valid_mask, start, count); + } + + kfree(ranges); + return 0; +} + static int gpiochip_init_valid_mask(struct gpio_chip *gc) { + int ret; + + ret = gpiochip_apply_reserved_ranges(gc); + if (ret) + return ret; + if (gc->init_valid_mask) return gc->init_valid_mask(gc, gc->valid_mask, @@ -493,7 +549,7 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid); static void gpiodevice_release(struct device *dev) { - struct gpio_device *gdev = container_of(dev, struct gpio_device, dev); + struct gpio_device *gdev = to_gpio_device(dev); unsigned long flags; spin_lock_irqsave(&gpio_lock, flags); @@ -627,7 +683,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * Assign fwnode depending on the result of the previous calls, * if none of them succeed, assign it to the parent's one. */ - gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; + gc->fwnode = gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode; gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL); if (gdev->id < 0) { @@ -719,6 +775,9 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, * a poison instead. */ gc->base = base; + } else { + dev_warn(&gdev->dev, + "Static allocation of GPIO base is deprecated, use dynamic allocation.\n"); } gdev->base = base; @@ -735,6 +794,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data, spin_unlock_irqrestore(&gpio_lock, flags); BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier); + init_rwsem(&gdev->sem); #ifdef CONFIG_PINCTRL INIT_LIST_HEAD(&gdev->pin_ranges); @@ -875,6 +935,8 @@ void gpiochip_remove(struct gpio_chip *gc) unsigned long flags; unsigned int i; + down_write(&gdev->sem); + /* FIXME: should the legacy sysfs handling be moved to gpio_device? */ gpiochip_sysfs_unregister(gdev); gpiochip_free_hogs(gc); @@ -909,6 +971,7 @@ void gpiochip_remove(struct gpio_chip *gc) * gone. */ gcdev_unregister(gdev); + up_write(&gdev->sem); put_device(&gdev->dev); } EXPORT_SYMBOL_GPL(gpiochip_remove); @@ -3808,62 +3871,88 @@ static int platform_gpio_count(struct device *dev, const char *con_id) return count; } -/** - * fwnode_get_named_gpiod - obtain a GPIO from firmware node - * @fwnode: handle of the firmware node - * @propname: name of the firmware property representing the GPIO - * @index: index of the GPIO to obtain for the consumer - * @dflags: GPIO initialization flags - * @label: label to attach to the requested GPIO - * - * This function can be used for drivers that get their configuration - * from opaque firmware. - * - * The function properly finds the corresponding GPIO using whatever is the - * underlying firmware interface and then makes sure that the GPIO - * descriptor is requested before it is returned to the caller. - * - * Returns: - * On successful request the GPIO pin is configured in accordance with - * provided @dflags. - * - * In case of error an ERR_PTR() is returned. - */ -static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, - const char *propname, int index, - enum gpiod_flags dflags, - const char *label) +static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode, + struct device *consumer, + const char *con_id, + unsigned int idx, + enum gpiod_flags *flags, + unsigned long *lookupflags) { - unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT; - struct gpio_desc *desc = ERR_PTR(-ENODEV); - int ret; + struct gpio_desc *desc = ERR_PTR(-ENOENT); if (is_of_node(fwnode)) { - desc = gpiod_get_from_of_node(to_of_node(fwnode), - propname, index, - dflags, - label); - return desc; + dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags); } else if (is_acpi_node(fwnode)) { - struct acpi_gpio_info info; + dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags); + } else if (is_software_node(fwnode)) { + dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n", + fwnode, con_id); + desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags); + } - desc = acpi_node_get_gpiod(fwnode, propname, index, &info); - if (IS_ERR(desc)) - return desc; + return desc; +} - acpi_gpio_update_gpiod_flags(&dflags, &info); - acpi_gpio_update_gpiod_lookup_flags(&lflags, &info); - } else { - return ERR_PTR(-EINVAL); +static struct gpio_desc *gpiod_find_and_request(struct device *consumer, + struct fwnode_handle *fwnode, + const char *con_id, + unsigned int idx, + enum gpiod_flags flags, + const char *label, + bool platform_lookup_allowed) +{ + struct gpio_desc *desc = ERR_PTR(-ENOENT); + unsigned long lookupflags; + int ret; + + if (!IS_ERR_OR_NULL(fwnode)) + desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx, + &flags, &lookupflags); + + if (gpiod_not_found(desc) && platform_lookup_allowed) { + /* + * Either we are not using DT or ACPI, or their lookup did not + * return a result. In that case, use platform lookup as a + * fallback. + */ + dev_dbg(consumer, "using lookup tables for GPIO lookup\n"); + desc = gpiod_find(consumer, con_id, idx, &lookupflags); } - /* Currently only ACPI takes this path */ + if (IS_ERR(desc)) { + dev_dbg(consumer, "No GPIO consumer %s found\n", con_id); + return desc; + } + + /* + * If a connection label was passed use that, else attempt to use + * the device name as label + */ ret = gpiod_request(desc, label); - if (ret) - return ERR_PTR(ret); + if (ret) { + if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) + return ERR_PTR(ret); + + /* + * This happens when there are several consumers for + * the same GPIO line: we just return here without + * further initialization. It is a bit of a hack. + * This is necessary to support fixed regulators. + * + * FIXME: Make this more sane and safe. + */ + dev_info(consumer, + "nonexclusive access to GPIO for %s\n", con_id); + return desc; + } - ret = gpiod_configure_flags(desc, propname, lflags, dflags); + ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); if (ret < 0) { + dev_dbg(consumer, "setup of GPIO %s failed\n", con_id); gpiod_put(desc); return ERR_PTR(ret); } @@ -3896,29 +3985,12 @@ static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode, * In case of error an ERR_PTR() is returned. */ struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode, - const char *con_id, int index, + const char *con_id, + int index, enum gpiod_flags flags, const char *label) { - struct gpio_desc *desc; - char prop_name[32]; /* 32 is max size of property name */ - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) { - if (con_id) - snprintf(prop_name, sizeof(prop_name), "%s-%s", - con_id, gpio_suffixes[i]); - else - snprintf(prop_name, sizeof(prop_name), "%s", - gpio_suffixes[i]); - - desc = fwnode_get_named_gpiod(fwnode, prop_name, index, flags, - label); - if (!gpiod_not_found(desc)) - break; - } - - return desc; + return gpiod_find_and_request(NULL, fwnode, con_id, index, flags, label, false); } EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index); @@ -3937,6 +4009,8 @@ int gpiod_count(struct device *dev, const char *con_id) count = of_gpio_get_count(dev, con_id); else if (is_acpi_node(fwnode)) count = acpi_gpio_count(dev, con_id); + else if (is_software_node(fwnode)) + count = swnode_gpio_count(fwnode, con_id); if (count < 0) count = platform_gpio_count(dev, con_id); @@ -4072,70 +4146,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev, unsigned int idx, enum gpiod_flags flags) { - unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT; - struct gpio_desc *desc = NULL; - int ret; - /* Maybe we have a device name, maybe not */ + struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; const char *devname = dev ? dev_name(dev) : "?"; - const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL; - - dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id); - - /* Using device tree? */ - if (is_of_node(fwnode)) { - dev_dbg(dev, "using device tree for GPIO lookup\n"); - desc = of_find_gpio(dev, con_id, idx, &lookupflags); - } else if (is_acpi_node(fwnode)) { - dev_dbg(dev, "using ACPI for GPIO lookup\n"); - desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags); - } - - /* - * Either we are not using DT or ACPI, or their lookup did not return - * a result. In that case, use platform lookup as a fallback. - */ - if (!desc || gpiod_not_found(desc)) { - dev_dbg(dev, "using lookup tables for GPIO lookup\n"); - desc = gpiod_find(dev, con_id, idx, &lookupflags); - } - - if (IS_ERR(desc)) { - dev_dbg(dev, "No GPIO consumer %s found\n", con_id); - return desc; - } - - /* - * If a connection label was passed use that, else attempt to use - * the device name as label - */ - ret = gpiod_request(desc, con_id ?: devname); - if (ret) { - if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE)) - return ERR_PTR(ret); - - /* - * This happens when there are several consumers for - * the same GPIO line: we just return here without - * further initialization. It is a bit of a hack. - * This is necessary to support fixed regulators. - * - * FIXME: Make this more sane and safe. - */ - dev_info(dev, "nonexclusive access to GPIO for %s\n", con_id ?: devname); - return desc; - } - - ret = gpiod_configure_flags(desc, con_id, lookupflags, flags); - if (ret < 0) { - dev_dbg(dev, "setup of GPIO %s failed\n", con_id); - gpiod_put(desc); - return ERR_PTR(ret); - } + const char *label = con_id ?: devname; - blocking_notifier_call_chain(&desc->gdev->notifier, - GPIOLINE_CHANGED_REQUESTED, desc); - - return desc; + return gpiod_find_and_request(dev, fwnode, con_id, idx, flags, label, true); } EXPORT_SYMBOL_GPL(gpiod_get_index); diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h index d900ecdbac46dfd4258b1e536c1f0d2fe57a874e..b3c2db6eba80cbb05f1490b476b0e7f761fa070a 100644 --- a/drivers/gpio/gpiolib.h +++ b/drivers/gpio/gpiolib.h @@ -15,14 +15,15 @@ #include #include #include +#include #define GPIOCHIP_NAME "gpiochip" /** * struct gpio_device - internal state container for GPIO devices - * @id: numerical ID number for the GPIO chip * @dev: the GPIO device struct * @chrdev: character device for the GPIO device + * @id: numerical ID number for the GPIO chip * @mockdev: class device used by the deprecated sysfs interface (may be * NULL) * @owner: helps prevent removal of modules exporting active GPIOs @@ -39,6 +40,9 @@ * @list: links gpio_device:s together for traversal * @notifier: used to notify subscribers about lines being requested, released * or reconfigured + * @sem: protects the structure from a NULL-pointer dereference of @chip by + * user-space operations when the device gets unregistered during + * a hot-unplug event * @pin_ranges: range of pins served by the GPIO driver * * This state container holds most of the runtime variable data @@ -47,9 +51,9 @@ * userspace. */ struct gpio_device { - int id; struct device dev; struct cdev chrdev; + int id; struct device *mockdev; struct module *owner; struct gpio_chip *chip; @@ -60,6 +64,7 @@ struct gpio_device { void *data; struct list_head list; struct blocking_notifier_head notifier; + struct rw_semaphore sem; #ifdef CONFIG_PINCTRL /* @@ -72,6 +77,11 @@ struct gpio_device { #endif }; +static inline struct gpio_device *to_gpio_device(struct device *dev) +{ + return container_of(dev, struct gpio_device, dev); +} + /* gpio suffixes used for ACPI and device tree lookup */ static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" }; diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c index 4d4a715b429d1d4461b7a79debe829c940385655..2c2b92324a2e90fe13dcfa889986f00419c4be60 100644 --- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c +++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c @@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector) return drm_panel_get_modes(fsl_connector->panel, connector); } -static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { if (mode->hdisplay & 0xf) return MODE_ERROR; diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index 077892a9aa8fdcb1588a81ebf4f47f89ef627df2..f5451adcd4890c08cb121f1e1d00d58ce64d010f 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -669,9 +669,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) vgpu->attached = true; - kvmgt_protect_table_init(vgpu); - gvt_cache_init(vgpu); - vgpu->track_node.track_write = kvmgt_page_track_write; vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; kvm_get_kvm(vgpu->vfio_device.kvm); @@ -715,6 +712,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev) kvmgt_protect_table_destroy(vgpu); gvt_cache_destroy(vgpu); + WARN_ON(vgpu->nr_cache_entries); + + vgpu->gfn_cache = RB_ROOT; + vgpu->dma_addr_cache = RB_ROOT; + intel_vgpu_release_msi_eventfd_ctx(vgpu); vgpu->attached = false; @@ -1445,9 +1447,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); struct intel_vgpu_type *type = container_of(mdev->type, struct intel_vgpu_type, type); + int ret; vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt; - return intel_gvt_create_vgpu(vgpu, type->conf); + ret = intel_gvt_create_vgpu(vgpu, type->conf); + if (ret) + return ret; + + kvmgt_protect_table_init(vgpu); + gvt_cache_init(vgpu); + + return 0; } static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) @@ -1455,7 +1465,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev) struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); intel_gvt_destroy_vgpu(vgpu); - vfio_free_device(vfio_dev); } static const struct vfio_device_ops intel_vgpu_dev_ops = { @@ -1468,6 +1477,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = { .mmap = intel_vgpu_mmap, .ioctl = intel_vgpu_ioctl, .dma_unmap = intel_vgpu_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static int intel_vgpu_probe(struct mdev_device *mdev) diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c index c822d0aafd2dc7d9d480e208a386c41458b4d5c9..e3f808372c47b87bf9a91eb21e3003b9d595b2d3 100644 --- a/drivers/gpu/drm/i915/i915_user_extensions.c +++ b/drivers/gpu/drm/i915/i915_user_extensions.c @@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext, return err; if (get_user(next, &ext->next_extension) || - overflows_type(next, ext)) + overflows_type(next, uintptr_t)) return -EFAULT; ext = u64_to_user_ptr(next); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 6c14d13364bf789a30edcf2b162e4557aaf32a2f..67a66d4d5c70cd8652bf762eba8ec4dcd785a9d4 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -111,10 +111,6 @@ bool i915_error_injected(void); #define range_overflows_end_t(type, start, size, max) \ range_overflows_end((type)(start), (type)(size), (type)(max)) -/* Note we don't consider signbits :| */ -#define overflows_type(x, T) \ - (sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T)) - #define ptr_mask_bits(ptr, n) ({ \ unsigned long __v = (unsigned long)(ptr); \ (typeof(ptr))(__v & -BIT(n)); \ diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c index f3a5616b7daf9310b73758f99863479679b7f76b..577c477b5f467c77e139f43c5d82e3d386a1832f 100644 --- a/drivers/gpu/drm/sti/sti_dvo.c +++ b/drivers/gpu/drm/sti/sti_dvo.c @@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_dvo_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_dvo_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c index ec6656b9ee7cc82957a080b45f87344e03af04b0..15097ac6793146bf4be75dc21da4d8303ada30b9 100644 --- a/drivers/gpu/drm/sti/sti_hda.c +++ b/drivers/gpu/drm/sti/sti_hda.c @@ -601,8 +601,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector) #define CLK_TOLERANCE_HZ 50 -static int sti_hda_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hda_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c index fcc2194869d68cd6978e74f7e09b01a152b9bc5c..8539fe1fedc4c6f91a8cd1196ec3a39af34fbd6e 100644 --- a/drivers/gpu/drm/sti/sti_hdmi.c +++ b/drivers/gpu/drm/sti/sti_hdmi.c @@ -1004,8 +1004,9 @@ fail: #define CLK_TOLERANCE_HZ 50 -static int sti_hdmi_connector_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) +static enum drm_mode_status +sti_hdmi_connector_mode_valid(struct drm_connector *connector, + struct drm_display_mode *mode) { int target = mode->clock * 1000; int target_min = target - CLK_TOLERANCE_HZ; diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c index ad489caf53ad80e60d2a9f2f64d47143816d139c..744a91e6e78c5d9d0a41a17b1687101463edaa62 100644 --- a/drivers/hid/hid-u2fzero.c +++ b/drivers/hid/hid-u2fzero.c @@ -261,7 +261,6 @@ static int u2fzero_init_hwrng(struct u2fzero_device *dev, dev->hwrng.name = dev->rng_name; dev->hwrng.read = u2fzero_rng_read; - dev->hwrng.quality = 1; return devm_hwrng_register(&dev->hdev->dev, &dev->hwrng); } diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig index e50f9603d189e8c0edbf1324398eb3cf2f636cb3..a7bfddf08fa7bab139ef1e87a0d74f2965a221fb 100644 --- a/drivers/i2c/busses/Kconfig +++ b/drivers/i2c/busses/Kconfig @@ -673,7 +673,7 @@ config I2C_HIGHLANDER config I2C_HISI tristate "HiSilicon I2C controller" - depends on (ARM64 && ACPI) || COMPILE_TEST + depends on ARM64 || COMPILE_TEST help Say Y here if you want to have Hisilicon I2C controller support available on the Kunpeng Server. diff --git a/drivers/i2c/busses/i2c-amd-mp2-pci.c b/drivers/i2c/busses/i2c-amd-mp2-pci.c index f57077a7448d1f7391104f4ea6ccfcd88561e417..14316530094968ab5a25ba03eecd0f132c2fc3ca 100644 --- a/drivers/i2c/busses/i2c-amd-mp2-pci.c +++ b/drivers/i2c/busses/i2c-amd-mp2-pci.c @@ -288,7 +288,7 @@ static void amd_mp2_clear_reg(struct amd_mp2_dev *privdata) static int amd_mp2_pci_init(struct amd_mp2_dev *privdata, struct pci_dev *pci_dev) { - int rc; + int irq_flag = 0, rc; pci_set_drvdata(pci_dev, privdata); @@ -311,17 +311,29 @@ static int amd_mp2_pci_init(struct amd_mp2_dev *privdata, if (rc) goto err_dma_mask; - /* Set up intx irq */ + /* request and enable interrupt */ writel(0, privdata->mmio + AMD_P2C_MSG_INTEN); - pci_intx(pci_dev, 1); - rc = devm_request_irq(&pci_dev->dev, pci_dev->irq, amd_mp2_irq_isr, - IRQF_SHARED, dev_name(&pci_dev->dev), privdata); - if (rc) - pci_err(pci_dev, "Failure requesting irq %i: %d\n", - pci_dev->irq, rc); + rc = pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_ALL_TYPES); + if (rc < 0) { + dev_err(&pci_dev->dev, "Failed to allocate single IRQ err=%d\n", rc); + goto err_dma_mask; + } + + privdata->dev_irq = pci_irq_vector(pci_dev, 0); + if (!pci_dev->msix_enabled && !pci_dev->msi_enabled) + irq_flag = IRQF_SHARED; + + rc = devm_request_irq(&pci_dev->dev, privdata->dev_irq, + amd_mp2_irq_isr, irq_flag, dev_name(&pci_dev->dev), privdata); + if (rc) { + pci_err(pci_dev, "Failure requesting irq %i: %d\n", privdata->dev_irq, rc); + goto free_irq_vectors; + } return rc; +free_irq_vectors: + free_irq(privdata->dev_irq, privdata); err_dma_mask: pci_clear_master(pci_dev); err_pci_enable: @@ -364,7 +376,7 @@ static void amd_mp2_pci_remove(struct pci_dev *pci_dev) pm_runtime_forbid(&pci_dev->dev); pm_runtime_get_noresume(&pci_dev->dev); - pci_intx(pci_dev, 0); + free_irq(privdata->dev_irq, privdata); pci_clear_master(pci_dev); amd_mp2_clear_reg(privdata); diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h index ddecd0c8865608456b84a1a5fd8c3c57393075c3..018a42de8b1e3c53ea17ce0dcc38be98bb3b6b92 100644 --- a/drivers/i2c/busses/i2c-amd-mp2.h +++ b/drivers/i2c/busses/i2c-amd-mp2.h @@ -183,6 +183,7 @@ struct amd_mp2_dev { struct mutex c2p_lock; u8 c2p_lock_busid; unsigned int probed; + int dev_irq; }; /* PCIe communication driver */ diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c index c023b691441ea7ca6570e87c3b6095089f591071..a3240ece55b2b189433c7a0e09653a4ea5caece5 100644 --- a/drivers/i2c/busses/i2c-designware-common.c +++ b/drivers/i2c/busses/i2c-designware-common.c @@ -625,10 +625,5 @@ void i2c_dw_disable(struct dw_i2c_dev *dev) i2c_dw_release_lock(dev); } -void i2c_dw_disable_int(struct dw_i2c_dev *dev) -{ - regmap_write(dev->map, DW_IC_INTR_MASK, 0); -} - MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core"); MODULE_LICENSE("GPL"); diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h index 4d3a3b464ecd8160b659f053e22f4c6bff80e4d9..95ebc5eaa5d12cf6008e5587996950a67bd20a5e 100644 --- a/drivers/i2c/busses/i2c-designware-core.h +++ b/drivers/i2c/busses/i2c-designware-core.h @@ -18,12 +18,12 @@ #include #include -#define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C | \ - I2C_FUNC_SMBUS_BYTE | \ - I2C_FUNC_SMBUS_BYTE_DATA | \ - I2C_FUNC_SMBUS_WORD_DATA | \ - I2C_FUNC_SMBUS_BLOCK_DATA | \ - I2C_FUNC_SMBUS_I2C_BLOCK) +#define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C | \ + I2C_FUNC_SMBUS_BYTE | \ + I2C_FUNC_SMBUS_BYTE_DATA | \ + I2C_FUNC_SMBUS_WORD_DATA | \ + I2C_FUNC_SMBUS_BLOCK_DATA | \ + I2C_FUNC_SMBUS_I2C_BLOCK) #define DW_IC_CON_MASTER BIT(0) #define DW_IC_CON_SPEED_STD (1 << 1) @@ -43,98 +43,98 @@ /* * Registers offset */ -#define DW_IC_CON 0x00 -#define DW_IC_TAR 0x04 -#define DW_IC_SAR 0x08 -#define DW_IC_DATA_CMD 0x10 -#define DW_IC_SS_SCL_HCNT 0x14 -#define DW_IC_SS_SCL_LCNT 0x18 -#define DW_IC_FS_SCL_HCNT 0x1c -#define DW_IC_FS_SCL_LCNT 0x20 -#define DW_IC_HS_SCL_HCNT 0x24 -#define DW_IC_HS_SCL_LCNT 0x28 -#define DW_IC_INTR_STAT 0x2c -#define DW_IC_INTR_MASK 0x30 -#define DW_IC_RAW_INTR_STAT 0x34 -#define DW_IC_RX_TL 0x38 -#define DW_IC_TX_TL 0x3c -#define DW_IC_CLR_INTR 0x40 -#define DW_IC_CLR_RX_UNDER 0x44 -#define DW_IC_CLR_RX_OVER 0x48 -#define DW_IC_CLR_TX_OVER 0x4c -#define DW_IC_CLR_RD_REQ 0x50 -#define DW_IC_CLR_TX_ABRT 0x54 -#define DW_IC_CLR_RX_DONE 0x58 -#define DW_IC_CLR_ACTIVITY 0x5c -#define DW_IC_CLR_STOP_DET 0x60 -#define DW_IC_CLR_START_DET 0x64 -#define DW_IC_CLR_GEN_CALL 0x68 -#define DW_IC_ENABLE 0x6c -#define DW_IC_STATUS 0x70 -#define DW_IC_TXFLR 0x74 -#define DW_IC_RXFLR 0x78 -#define DW_IC_SDA_HOLD 0x7c -#define DW_IC_TX_ABRT_SOURCE 0x80 -#define DW_IC_ENABLE_STATUS 0x9c -#define DW_IC_CLR_RESTART_DET 0xa8 -#define DW_IC_COMP_PARAM_1 0xf4 -#define DW_IC_COMP_VERSION 0xf8 -#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A -#define DW_IC_COMP_TYPE 0xfc -#define DW_IC_COMP_TYPE_VALUE 0x44570140 - -#define DW_IC_INTR_RX_UNDER BIT(0) -#define DW_IC_INTR_RX_OVER BIT(1) -#define DW_IC_INTR_RX_FULL BIT(2) -#define DW_IC_INTR_TX_OVER BIT(3) -#define DW_IC_INTR_TX_EMPTY BIT(4) -#define DW_IC_INTR_RD_REQ BIT(5) -#define DW_IC_INTR_TX_ABRT BIT(6) -#define DW_IC_INTR_RX_DONE BIT(7) -#define DW_IC_INTR_ACTIVITY BIT(8) -#define DW_IC_INTR_STOP_DET BIT(9) -#define DW_IC_INTR_START_DET BIT(10) -#define DW_IC_INTR_GEN_CALL BIT(11) -#define DW_IC_INTR_RESTART_DET BIT(12) - -#define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \ - DW_IC_INTR_TX_ABRT | \ - DW_IC_INTR_STOP_DET) -#define DW_IC_INTR_MASTER_MASK (DW_IC_INTR_DEFAULT_MASK | \ - DW_IC_INTR_TX_EMPTY) -#define DW_IC_INTR_SLAVE_MASK (DW_IC_INTR_DEFAULT_MASK | \ - DW_IC_INTR_RX_DONE | \ - DW_IC_INTR_RX_UNDER | \ - DW_IC_INTR_RD_REQ) - -#define DW_IC_STATUS_ACTIVITY BIT(0) -#define DW_IC_STATUS_TFE BIT(2) -#define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) -#define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) - -#define DW_IC_SDA_HOLD_RX_SHIFT 16 -#define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, 16) - -#define DW_IC_ERR_TX_ABRT 0x1 - -#define DW_IC_TAR_10BITADDR_MASTER BIT(12) +#define DW_IC_CON 0x00 +#define DW_IC_TAR 0x04 +#define DW_IC_SAR 0x08 +#define DW_IC_DATA_CMD 0x10 +#define DW_IC_SS_SCL_HCNT 0x14 +#define DW_IC_SS_SCL_LCNT 0x18 +#define DW_IC_FS_SCL_HCNT 0x1c +#define DW_IC_FS_SCL_LCNT 0x20 +#define DW_IC_HS_SCL_HCNT 0x24 +#define DW_IC_HS_SCL_LCNT 0x28 +#define DW_IC_INTR_STAT 0x2c +#define DW_IC_INTR_MASK 0x30 +#define DW_IC_RAW_INTR_STAT 0x34 +#define DW_IC_RX_TL 0x38 +#define DW_IC_TX_TL 0x3c +#define DW_IC_CLR_INTR 0x40 +#define DW_IC_CLR_RX_UNDER 0x44 +#define DW_IC_CLR_RX_OVER 0x48 +#define DW_IC_CLR_TX_OVER 0x4c +#define DW_IC_CLR_RD_REQ 0x50 +#define DW_IC_CLR_TX_ABRT 0x54 +#define DW_IC_CLR_RX_DONE 0x58 +#define DW_IC_CLR_ACTIVITY 0x5c +#define DW_IC_CLR_STOP_DET 0x60 +#define DW_IC_CLR_START_DET 0x64 +#define DW_IC_CLR_GEN_CALL 0x68 +#define DW_IC_ENABLE 0x6c +#define DW_IC_STATUS 0x70 +#define DW_IC_TXFLR 0x74 +#define DW_IC_RXFLR 0x78 +#define DW_IC_SDA_HOLD 0x7c +#define DW_IC_TX_ABRT_SOURCE 0x80 +#define DW_IC_ENABLE_STATUS 0x9c +#define DW_IC_CLR_RESTART_DET 0xa8 +#define DW_IC_COMP_PARAM_1 0xf4 +#define DW_IC_COMP_VERSION 0xf8 +#define DW_IC_SDA_HOLD_MIN_VERS 0x3131312A /* "111*" == v1.11* */ +#define DW_IC_COMP_TYPE 0xfc +#define DW_IC_COMP_TYPE_VALUE 0x44570140 /* "DW" + 0x0140 */ + +#define DW_IC_INTR_RX_UNDER BIT(0) +#define DW_IC_INTR_RX_OVER BIT(1) +#define DW_IC_INTR_RX_FULL BIT(2) +#define DW_IC_INTR_TX_OVER BIT(3) +#define DW_IC_INTR_TX_EMPTY BIT(4) +#define DW_IC_INTR_RD_REQ BIT(5) +#define DW_IC_INTR_TX_ABRT BIT(6) +#define DW_IC_INTR_RX_DONE BIT(7) +#define DW_IC_INTR_ACTIVITY BIT(8) +#define DW_IC_INTR_STOP_DET BIT(9) +#define DW_IC_INTR_START_DET BIT(10) +#define DW_IC_INTR_GEN_CALL BIT(11) +#define DW_IC_INTR_RESTART_DET BIT(12) + +#define DW_IC_INTR_DEFAULT_MASK (DW_IC_INTR_RX_FULL | \ + DW_IC_INTR_TX_ABRT | \ + DW_IC_INTR_STOP_DET) +#define DW_IC_INTR_MASTER_MASK (DW_IC_INTR_DEFAULT_MASK | \ + DW_IC_INTR_TX_EMPTY) +#define DW_IC_INTR_SLAVE_MASK (DW_IC_INTR_DEFAULT_MASK | \ + DW_IC_INTR_RX_UNDER | \ + DW_IC_INTR_RD_REQ) + +#define DW_IC_STATUS_ACTIVITY BIT(0) +#define DW_IC_STATUS_TFE BIT(2) +#define DW_IC_STATUS_RFNE BIT(3) +#define DW_IC_STATUS_MASTER_ACTIVITY BIT(5) +#define DW_IC_STATUS_SLAVE_ACTIVITY BIT(6) + +#define DW_IC_SDA_HOLD_RX_SHIFT 16 +#define DW_IC_SDA_HOLD_RX_MASK GENMASK(23, 16) + +#define DW_IC_ERR_TX_ABRT 0x1 + +#define DW_IC_TAR_10BITADDR_MASTER BIT(12) #define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH (BIT(2) | BIT(3)) #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK GENMASK(3, 2) /* - * status codes + * Sofware status flags */ -#define STATUS_IDLE 0x0 -#define STATUS_ACTIVE 0x1 -#define STATUS_WRITE_IN_PROGRESS 0x2 -#define STATUS_READ_IN_PROGRESS 0x4 +#define STATUS_ACTIVE BIT(0) +#define STATUS_WRITE_IN_PROGRESS BIT(1) +#define STATUS_READ_IN_PROGRESS BIT(2) +#define STATUS_MASK GENMASK(2, 0) /* * operation modes */ -#define DW_IC_MASTER 0 -#define DW_IC_SLAVE 1 +#define DW_IC_MASTER 0 +#define DW_IC_SLAVE 1 /* * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register @@ -142,20 +142,20 @@ * Only expected abort codes are listed here * refer to the datasheet for the full list */ -#define ABRT_7B_ADDR_NOACK 0 -#define ABRT_10ADDR1_NOACK 1 -#define ABRT_10ADDR2_NOACK 2 -#define ABRT_TXDATA_NOACK 3 -#define ABRT_GCALL_NOACK 4 -#define ABRT_GCALL_READ 5 -#define ABRT_SBYTE_ACKDET 7 -#define ABRT_SBYTE_NORSTRT 9 -#define ABRT_10B_RD_NORSTRT 10 -#define ABRT_MASTER_DIS 11 -#define ARB_LOST 12 -#define ABRT_SLAVE_FLUSH_TXFIFO 13 -#define ABRT_SLAVE_ARBLOST 14 -#define ABRT_SLAVE_RD_INTX 15 +#define ABRT_7B_ADDR_NOACK 0 +#define ABRT_10ADDR1_NOACK 1 +#define ABRT_10ADDR2_NOACK 2 +#define ABRT_TXDATA_NOACK 3 +#define ABRT_GCALL_NOACK 4 +#define ABRT_GCALL_READ 5 +#define ABRT_SBYTE_ACKDET 7 +#define ABRT_SBYTE_NORSTRT 9 +#define ABRT_10B_RD_NORSTRT 10 +#define ABRT_MASTER_DIS 11 +#define ARB_LOST 12 +#define ABRT_SLAVE_FLUSH_TXFIFO 13 +#define ABRT_SLAVE_ARBLOST 14 +#define ABRT_SLAVE_RD_INTX 15 #define DW_IC_TX_ABRT_7B_ADDR_NOACK BIT(ABRT_7B_ADDR_NOACK) #define DW_IC_TX_ABRT_10ADDR1_NOACK BIT(ABRT_10ADDR1_NOACK) @@ -172,11 +172,11 @@ #define DW_IC_RX_ABRT_SLAVE_ARBLOST BIT(ABRT_SLAVE_ARBLOST) #define DW_IC_RX_ABRT_SLAVE_FLUSH_TXFIFO BIT(ABRT_SLAVE_FLUSH_TXFIFO) -#define DW_IC_TX_ABRT_NOACK (DW_IC_TX_ABRT_7B_ADDR_NOACK | \ - DW_IC_TX_ABRT_10ADDR1_NOACK | \ - DW_IC_TX_ABRT_10ADDR2_NOACK | \ - DW_IC_TX_ABRT_TXDATA_NOACK | \ - DW_IC_TX_ABRT_GCALL_NOACK) +#define DW_IC_TX_ABRT_NOACK (DW_IC_TX_ABRT_7B_ADDR_NOACK | \ + DW_IC_TX_ABRT_10ADDR1_NOACK | \ + DW_IC_TX_ABRT_10ADDR2_NOACK | \ + DW_IC_TX_ABRT_TXDATA_NOACK | \ + DW_IC_TX_ABRT_GCALL_NOACK) struct clk; struct device; @@ -232,7 +232,6 @@ struct reset_control; * -1 if there is no semaphore. * @shared_with_punit: true if this bus is shared with the SoCs PUNIT * @disable: function to disable the controller - * @disable_int: function to disable all interrupts * @init: function to initialize the I2C hardware * @set_sda_hold_time: callback to retrieve IP specific SDA hold timing * @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE @@ -290,28 +289,27 @@ struct dw_i2c_dev { int semaphore_idx; bool shared_with_punit; void (*disable)(struct dw_i2c_dev *dev); - void (*disable_int)(struct dw_i2c_dev *dev); int (*init)(struct dw_i2c_dev *dev); int (*set_sda_hold_time)(struct dw_i2c_dev *dev); int mode; struct i2c_bus_recovery_info rinfo; }; -#define ACCESS_INTR_MASK BIT(0) -#define ACCESS_NO_IRQ_SUSPEND BIT(1) -#define ARBITRATION_SEMAPHORE BIT(2) +#define ACCESS_INTR_MASK BIT(0) +#define ACCESS_NO_IRQ_SUSPEND BIT(1) +#define ARBITRATION_SEMAPHORE BIT(2) -#define MODEL_MSCC_OCELOT BIT(8) -#define MODEL_BAIKAL_BT1 BIT(9) -#define MODEL_AMD_NAVI_GPU BIT(10) -#define MODEL_MASK GENMASK(11, 8) +#define MODEL_MSCC_OCELOT BIT(8) +#define MODEL_BAIKAL_BT1 BIT(9) +#define MODEL_AMD_NAVI_GPU BIT(10) +#define MODEL_MASK GENMASK(11, 8) /* * Enable UCSI interrupt by writing 0xd at register * offset 0x474 specified in hardware specification. */ -#define AMD_UCSI_INTR_REG 0x474 -#define AMD_UCSI_INTR_EN 0xd +#define AMD_UCSI_INTR_REG 0x474 +#define AMD_UCSI_INTR_EN 0xd struct i2c_dw_semaphore_callbacks { int (*probe)(struct dw_i2c_dev *dev); @@ -331,7 +329,6 @@ int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev); int i2c_dw_set_fifo_size(struct dw_i2c_dev *dev); u32 i2c_dw_func(struct i2c_adapter *adap); void i2c_dw_disable(struct dw_i2c_dev *dev); -void i2c_dw_disable_int(struct dw_i2c_dev *dev); static inline void __i2c_dw_enable(struct dw_i2c_dev *dev) { diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index dc3c5a15a95b9574e168932a9fa398779acdc1e0..45f569155bfe1477f681808e0e741646c5d94690 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -239,7 +239,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) msgs[dev->msg_write_idx].addr | ic_tar); /* Enforce disabled interrupts (due to HW issues) */ - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); /* Enable the adapter */ __i2c_dw_enable(dev); @@ -299,7 +299,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs, dev->msgs = msgs; dev->msgs_num = num_msgs; i2c_dw_xfer_init(dev); - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); /* Initiate messages read/write transaction */ for (msg_wrt_idx = 0; msg_wrt_idx < num_msgs; msg_wrt_idx++) { @@ -574,7 +574,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num) dev->msg_write_idx = 0; dev->msg_read_idx = 0; dev->msg_err = 0; - dev->status = STATUS_IDLE; + dev->status = 0; dev->abort_source = 0; dev->rx_outstanding = 0; @@ -711,9 +711,18 @@ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev) * Interrupt service routine. This gets called whenever an I2C master interrupt * occurs. */ -static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev) +static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) { - u32 stat; + struct dw_i2c_dev *dev = dev_id; + u32 stat, enabled; + + regmap_read(dev->map, DW_IC_ENABLE, &enabled); + regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat); + if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) + return IRQ_NONE; + if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0)) + return IRQ_NONE; + dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); stat = i2c_dw_read_clear_intrbits(dev); @@ -726,12 +735,12 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev) * the HW active). */ regmap_write(dev->map, DW_IC_INTR_MASK, 0); - return 0; + return IRQ_HANDLED; } if (stat & DW_IC_INTR_TX_ABRT) { dev->cmd_err |= DW_IC_ERR_TX_ABRT; - dev->status = STATUS_IDLE; + dev->status &= ~STATUS_MASK; dev->rx_outstanding = 0; /* @@ -761,26 +770,10 @@ tx_aborted: else if (unlikely(dev->flags & ACCESS_INTR_MASK)) { /* Workaround to trigger pending interrupt */ regmap_read(dev->map, DW_IC_INTR_MASK, &stat); - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); regmap_write(dev->map, DW_IC_INTR_MASK, stat); } - return 0; -} - -static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id) -{ - struct dw_i2c_dev *dev = dev_id; - u32 stat, enabled; - - regmap_read(dev->map, DW_IC_ENABLE, &enabled); - regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat); - dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat); - if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY)) - return IRQ_NONE; - - i2c_dw_irq_handler_master(dev); - return IRQ_HANDLED; } @@ -878,7 +871,6 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) dev->init = i2c_dw_init_master; dev->disable = i2c_dw_disable; - dev->disable_int = i2c_dw_disable_int; ret = i2c_dw_init_regmap(dev); if (ret) @@ -917,7 +909,7 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev) if (ret) return ret; - i2c_dw_disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); i2c_dw_release_lock(dev); ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags, diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c index 0d15f4c1e9f7e3ae17a03f7b39465b47f8cf5abe..c6d2e4c2ac23c9021171ec4f2c3ca6036f6e6c45 100644 --- a/drivers/i2c/busses/i2c-designware-slave.c +++ b/drivers/i2c/busses/i2c-designware-slave.c @@ -78,13 +78,7 @@ static int i2c_dw_reg_slave(struct i2c_client *slave) __i2c_dw_enable(dev); - dev->cmd_err = 0; - dev->msg_write_idx = 0; - dev->msg_read_idx = 0; - dev->msg_err = 0; - dev->status = STATUS_IDLE; - dev->abort_source = 0; - dev->rx_outstanding = 0; + dev->status = 0; return 0; } @@ -93,7 +87,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave) { struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter); - dev->disable_int(dev); + regmap_write(dev->map, DW_IC_INTR_MASK, 0); dev->disable(dev); synchronize_irq(dev->irq); dev->slave = NULL; @@ -153,9 +147,9 @@ static u32 i2c_dw_read_clear_intrbits_slave(struct dw_i2c_dev *dev) * Interrupt service routine. This gets called whenever an I2C slave interrupt * occurs. */ - -static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) +static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) { + struct dw_i2c_dev *dev = dev_id; u32 raw_stat, stat, enabled, tmp; u8 val = 0, slave_activity; @@ -165,7 +159,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) slave_activity = ((tmp & DW_IC_STATUS_SLAVE_ACTIVITY) >> 6); if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave) - return 0; + return IRQ_NONE; stat = i2c_dw_read_clear_intrbits_slave(dev); dev_dbg(dev->dev, @@ -173,55 +167,45 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev) enabled, slave_activity, raw_stat, stat); if (stat & DW_IC_INTR_RX_FULL) { - if (dev->status != STATUS_WRITE_IN_PROGRESS) { - dev->status = STATUS_WRITE_IN_PROGRESS; + if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) { + dev->status |= STATUS_WRITE_IN_PROGRESS; + dev->status &= ~STATUS_READ_IN_PROGRESS; i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED, &val); } - regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); - val = tmp; - if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, - &val)) - dev_vdbg(dev->dev, "Byte %X acked!", val); + do { + regmap_read(dev->map, DW_IC_DATA_CMD, &tmp); + val = tmp; + i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED, + &val); + regmap_read(dev->map, DW_IC_STATUS, &tmp); + } while (tmp & DW_IC_STATUS_RFNE); } if (stat & DW_IC_INTR_RD_REQ) { if (slave_activity) { regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp); - dev->status = STATUS_READ_IN_PROGRESS; - if (!i2c_slave_event(dev->slave, - I2C_SLAVE_READ_REQUESTED, - &val)) - regmap_write(dev->map, DW_IC_DATA_CMD, val); + if (!(dev->status & STATUS_READ_IN_PROGRESS)) { + i2c_slave_event(dev->slave, + I2C_SLAVE_READ_REQUESTED, + &val); + dev->status |= STATUS_READ_IN_PROGRESS; + dev->status &= ~STATUS_WRITE_IN_PROGRESS; + } else { + i2c_slave_event(dev->slave, + I2C_SLAVE_READ_PROCESSED, + &val); + } + regmap_write(dev->map, DW_IC_DATA_CMD, val); } } - if (stat & DW_IC_INTR_RX_DONE) { - if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED, - &val)) - regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp); - } - - if (stat & DW_IC_INTR_STOP_DET) { - dev->status = STATUS_IDLE; + if (stat & DW_IC_INTR_STOP_DET) i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val); - } - return 1; -} - -static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id) -{ - struct dw_i2c_dev *dev = dev_id; - int ret; - - ret = i2c_dw_irq_handler_slave(dev); - if (ret > 0) - complete(&dev->cmd_complete); - - return IRQ_RETVAL(ret); + return IRQ_HANDLED; } static const struct i2c_algorithm i2c_dw_algo = { @@ -246,11 +230,8 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev) struct i2c_adapter *adap = &dev->adapter; int ret; - init_completion(&dev->cmd_complete); - dev->init = i2c_dw_init_slave; dev->disable = i2c_dw_disable; - dev->disable_int = i2c_dw_disable_int; ret = i2c_dw_init_regmap(dev); if (ret) diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c index b1985c1667e163bcb810c9c0786620bd3ebf4d04..0e4385a9bcf71c902345163298cf6f4734f3d5e6 100644 --- a/drivers/i2c/busses/i2c-gpio.c +++ b/drivers/i2c/busses/i2c-gpio.c @@ -482,19 +482,17 @@ static int i2c_gpio_remove(struct platform_device *pdev) return 0; } -#if defined(CONFIG_OF) static const struct of_device_id i2c_gpio_dt_ids[] = { { .compatible = "i2c-gpio", }, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, i2c_gpio_dt_ids); -#endif static struct platform_driver i2c_gpio_driver = { .driver = { .name = "i2c-gpio", - .of_match_table = of_match_ptr(i2c_gpio_dt_ids), + .of_match_table = i2c_gpio_dt_ids, }, .probe = i2c_gpio_probe, .remove = i2c_gpio_remove, diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c index 76c3d8f6fc3c6e4ed24b9bc4619905bfab08b41d..8c6c7075c765c502e194d222934b294ba966de89 100644 --- a/drivers/i2c/busses/i2c-hisi.c +++ b/drivers/i2c/busses/i2c-hisi.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -88,6 +89,7 @@ struct hisi_i2c_controller { struct i2c_adapter adapter; void __iomem *iobase; struct device *dev; + struct clk *clk; int irq; /* Intermediates for recording the transfer process */ @@ -454,10 +456,15 @@ static int hisi_i2c_probe(struct platform_device *pdev) return ret; } - ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz); - if (ret) { - dev_err(dev, "failed to get clock frequency, ret = %d\n", ret); - return ret; + ctlr->clk = devm_clk_get_optional_enabled(&pdev->dev, NULL); + if (IS_ERR_OR_NULL(ctlr->clk)) { + ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz); + if (ret) { + dev_err(dev, "failed to get clock frequency, ret = %d\n", ret); + return ret; + } + } else { + clk_rate_hz = clk_get_rate(ctlr->clk); } ctlr->clk_rate_khz = DIV_ROUND_UP_ULL(clk_rate_hz, HZ_PER_KHZ); @@ -489,11 +496,18 @@ static const struct acpi_device_id hisi_i2c_acpi_ids[] = { }; MODULE_DEVICE_TABLE(acpi, hisi_i2c_acpi_ids); +static const struct of_device_id hisi_i2c_dts_ids[] = { + { .compatible = "hisilicon,ascend910-i2c", }, + { } +}; +MODULE_DEVICE_TABLE(of, hisi_i2c_dts_ids); + static struct platform_driver hisi_i2c_driver = { .probe = hisi_i2c_probe, .driver = { .name = "hisi-i2c", .acpi_match_table = hisi_i2c_acpi_ids, + .of_match_table = hisi_i2c_dts_ids, }, }; module_platform_driver(hisi_i2c_driver); diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c index fc70920c4ddabab247eae7de1434d48e3d83d593..cf5bacf3a4884430047d71c181896d63a17c4f4f 100644 --- a/drivers/i2c/busses/i2c-imx.c +++ b/drivers/i2c/busses/i2c-imx.c @@ -1451,8 +1451,7 @@ static int i2c_imx_probe(struct platform_device *pdev) if (irq < 0) return irq; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); + base = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(base)) return PTR_ERR(base); diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c index fe2349590f75e2c0bb87f70420ba08558644519b..c74985d77b0ecf34726099b3eded94a2f326d072 100644 --- a/drivers/i2c/busses/i2c-ismt.c +++ b/drivers/i2c/busses/i2c-ismt.c @@ -509,6 +509,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr, if (read_write == I2C_SMBUS_WRITE) { /* Block Write */ dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA: WRITE\n"); + if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX) + return -EINVAL; + dma_size = data->block[0] + 1; dma_direction = DMA_TO_DEVICE; desc->wr_len_cmd = dma_size; diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c index fc7bfd98156ba730ff781aea280f87e4e07962e7..d80e59340d97a1e6f7790880b4f937e05cf5650e 100644 --- a/drivers/i2c/busses/i2c-mt65xx.c +++ b/drivers/i2c/busses/i2c-mt65xx.c @@ -431,6 +431,19 @@ static const struct mtk_i2c_compatible mt8168_compat = { .max_dma_support = 33, }; +static const struct mtk_i2c_compatible mt7986_compat = { + .quirks = &mt7622_i2c_quirks, + .regs = mt_i2c_regs_v1, + .pmic_i2c = 0, + .dcm = 1, + .auto_restart = 1, + .aux_len_reg = 1, + .timing_adjust = 0, + .dma_sync = 1, + .ltiming_adjust = 0, + .max_dma_support = 32, +}; + static const struct mtk_i2c_compatible mt8173_compat = { .regs = mt_i2c_regs_v1, .pmic_i2c = 0, @@ -503,6 +516,7 @@ static const struct of_device_id mtk_i2c_of_match[] = { { .compatible = "mediatek,mt6577-i2c", .data = &mt6577_compat }, { .compatible = "mediatek,mt6589-i2c", .data = &mt6589_compat }, { .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat }, + { .compatible = "mediatek,mt7986-i2c", .data = &mt7986_compat }, { .compatible = "mediatek,mt8168-i2c", .data = &mt8168_compat }, { .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat }, { .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat }, diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c index 83457359ec450ed80e243b8f2feb7b1f931d93a1..38d5864d0cb5bd8d9b4baeef7f3c87693b44e2f1 100644 --- a/drivers/i2c/busses/i2c-npcm7xx.c +++ b/drivers/i2c/busses/i2c-npcm7xx.c @@ -106,7 +106,7 @@ enum i2c_addr { #define NPCM_I2CCST3 0x19 #define I2C_VER 0x1F -/*BANK0 regs*/ +/* BANK 0 regs */ #define NPCM_I2CADDR3 0x10 #define NPCM_I2CADDR7 0x11 #define NPCM_I2CADDR4 0x12 @@ -115,6 +115,20 @@ enum i2c_addr { #define NPCM_I2CADDR9 0x15 #define NPCM_I2CADDR6 0x16 #define NPCM_I2CADDR10 0x17 +#define NPCM_I2CCTL4 0x1A +#define NPCM_I2CCTL5 0x1B +#define NPCM_I2CSCLLT 0x1C /* SCL Low Time */ +#define NPCM_I2CFIF_CTL 0x1D /* FIFO Control */ +#define NPCM_I2CSCLHT 0x1E /* SCL High Time */ + +/* BANK 1 regs */ +#define NPCM_I2CFIF_CTS 0x10 /* Both FIFOs Control and Status */ +#define NPCM_I2CTXF_CTL 0x12 /* Tx-FIFO Control */ +#define NPCM_I2CT_OUT 0x14 /* Bus T.O. */ +#define NPCM_I2CPEC 0x16 /* PEC Data */ +#define NPCM_I2CTXF_STS 0x1A /* Tx-FIFO Status */ +#define NPCM_I2CRXF_STS 0x1C /* Rx-FIFO Status */ +#define NPCM_I2CRXF_CTL 0x1E /* Rx-FIFO Control */ #if IS_ENABLED(CONFIG_I2C_SLAVE) /* @@ -131,66 +145,51 @@ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = { }; #endif -#define NPCM_I2CCTL4 0x1A -#define NPCM_I2CCTL5 0x1B -#define NPCM_I2CSCLLT 0x1C /* SCL Low Time */ -#define NPCM_I2CFIF_CTL 0x1D /* FIFO Control */ -#define NPCM_I2CSCLHT 0x1E /* SCL High Time */ - -/* BANK 1 regs */ -#define NPCM_I2CFIF_CTS 0x10 /* Both FIFOs Control and Status */ -#define NPCM_I2CTXF_CTL 0x12 /* Tx-FIFO Control */ -#define NPCM_I2CT_OUT 0x14 /* Bus T.O. */ -#define NPCM_I2CPEC 0x16 /* PEC Data */ -#define NPCM_I2CTXF_STS 0x1A /* Tx-FIFO Status */ -#define NPCM_I2CRXF_STS 0x1C /* Rx-FIFO Status */ -#define NPCM_I2CRXF_CTL 0x1E /* Rx-FIFO Control */ - /* NPCM_I2CST reg fields */ -#define NPCM_I2CST_XMIT BIT(0) -#define NPCM_I2CST_MASTER BIT(1) -#define NPCM_I2CST_NMATCH BIT(2) -#define NPCM_I2CST_STASTR BIT(3) -#define NPCM_I2CST_NEGACK BIT(4) -#define NPCM_I2CST_BER BIT(5) -#define NPCM_I2CST_SDAST BIT(6) -#define NPCM_I2CST_SLVSTP BIT(7) +#define NPCM_I2CST_XMIT BIT(0) /* Transmit mode */ +#define NPCM_I2CST_MASTER BIT(1) /* Master mode */ +#define NPCM_I2CST_NMATCH BIT(2) /* New match */ +#define NPCM_I2CST_STASTR BIT(3) /* Stall after start */ +#define NPCM_I2CST_NEGACK BIT(4) /* Negative ACK */ +#define NPCM_I2CST_BER BIT(5) /* Bus error */ +#define NPCM_I2CST_SDAST BIT(6) /* SDA status */ +#define NPCM_I2CST_SLVSTP BIT(7) /* Slave stop */ /* NPCM_I2CCST reg fields */ -#define NPCM_I2CCST_BUSY BIT(0) -#define NPCM_I2CCST_BB BIT(1) -#define NPCM_I2CCST_MATCH BIT(2) -#define NPCM_I2CCST_GCMATCH BIT(3) -#define NPCM_I2CCST_TSDA BIT(4) -#define NPCM_I2CCST_TGSCL BIT(5) -#define NPCM_I2CCST_MATCHAF BIT(6) -#define NPCM_I2CCST_ARPMATCH BIT(7) +#define NPCM_I2CCST_BUSY BIT(0) /* Busy */ +#define NPCM_I2CCST_BB BIT(1) /* Bus busy */ +#define NPCM_I2CCST_MATCH BIT(2) /* Address match */ +#define NPCM_I2CCST_GCMATCH BIT(3) /* Global call match */ +#define NPCM_I2CCST_TSDA BIT(4) /* Test SDA line */ +#define NPCM_I2CCST_TGSCL BIT(5) /* Toggle SCL line */ +#define NPCM_I2CCST_MATCHAF BIT(6) /* Match address field */ +#define NPCM_I2CCST_ARPMATCH BIT(7) /* ARP address match */ /* NPCM_I2CCTL1 reg fields */ -#define NPCM_I2CCTL1_START BIT(0) -#define NPCM_I2CCTL1_STOP BIT(1) -#define NPCM_I2CCTL1_INTEN BIT(2) +#define NPCM_I2CCTL1_START BIT(0) /* Generate start condition */ +#define NPCM_I2CCTL1_STOP BIT(1) /* Generate stop condition */ +#define NPCM_I2CCTL1_INTEN BIT(2) /* Interrupt enable */ #define NPCM_I2CCTL1_EOBINTE BIT(3) #define NPCM_I2CCTL1_ACK BIT(4) -#define NPCM_I2CCTL1_GCMEN BIT(5) -#define NPCM_I2CCTL1_NMINTE BIT(6) -#define NPCM_I2CCTL1_STASTRE BIT(7) +#define NPCM_I2CCTL1_GCMEN BIT(5) /* Global call match enable */ +#define NPCM_I2CCTL1_NMINTE BIT(6) /* New match interrupt enable */ +#define NPCM_I2CCTL1_STASTRE BIT(7) /* Stall after start enable */ /* RW1S fields (inside a RW reg): */ #define NPCM_I2CCTL1_RWS \ (NPCM_I2CCTL1_START | NPCM_I2CCTL1_STOP | NPCM_I2CCTL1_ACK) /* npcm_i2caddr reg fields */ -#define NPCM_I2CADDR_A GENMASK(6, 0) -#define NPCM_I2CADDR_SAEN BIT(7) +#define NPCM_I2CADDR_A GENMASK(6, 0) /* Address */ +#define NPCM_I2CADDR_SAEN BIT(7) /* Slave address enable */ /* NPCM_I2CCTL2 reg fields */ -#define I2CCTL2_ENABLE BIT(0) -#define I2CCTL2_SCLFRQ6_0 GENMASK(7, 1) +#define I2CCTL2_ENABLE BIT(0) /* Module enable */ +#define I2CCTL2_SCLFRQ6_0 GENMASK(7, 1) /* Bits 0:6 of frequency divisor */ /* NPCM_I2CCTL3 reg fields */ -#define I2CCTL3_SCLFRQ8_7 GENMASK(1, 0) -#define I2CCTL3_ARPMEN BIT(2) +#define I2CCTL3_SCLFRQ8_7 GENMASK(1, 0) /* Bits 7:8 of frequency divisor */ +#define I2CCTL3_ARPMEN BIT(2) /* ARP match enable */ #define I2CCTL3_IDL_START BIT(3) #define I2CCTL3_400K_MODE BIT(4) #define I2CCTL3_BNK_SEL BIT(5) diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c index 9028ffb58cc079697f6acaed7714cba5cf0bbf27..7d54a9f34c74b5a3b074a469dca674eb286dd50d 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.c +++ b/drivers/i2c/busses/i2c-pasemi-core.c @@ -21,6 +21,7 @@ #define REG_MTXFIFO 0x00 #define REG_MRXFIFO 0x04 #define REG_SMSTA 0x14 +#define REG_IMASK 0x18 #define REG_CTL 0x1c #define REG_REV 0x28 @@ -66,6 +67,7 @@ static void pasemi_reset(struct pasemi_smbus *smbus) val |= CTL_EN; reg_write(smbus, REG_CTL, val); + reinit_completion(&smbus->irq_completion); } static void pasemi_smb_clear(struct pasemi_smbus *smbus) @@ -78,14 +80,21 @@ static void pasemi_smb_clear(struct pasemi_smbus *smbus) static int pasemi_smb_waitready(struct pasemi_smbus *smbus) { - int timeout = 10; + int timeout = 100; unsigned int status; - status = reg_read(smbus, REG_SMSTA); - - while (!(status & SMSTA_XEN) && timeout--) { - msleep(1); + if (smbus->use_irq) { + reinit_completion(&smbus->irq_completion); + reg_write(smbus, REG_IMASK, SMSTA_XEN | SMSTA_MTN); + wait_for_completion_timeout(&smbus->irq_completion, msecs_to_jiffies(100)); + reg_write(smbus, REG_IMASK, 0); status = reg_read(smbus, REG_SMSTA); + } else { + status = reg_read(smbus, REG_SMSTA); + while (!(status & SMSTA_XEN) && timeout--) { + msleep(1); + status = reg_read(smbus, REG_SMSTA); + } } /* Got NACK? */ @@ -344,10 +353,14 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) /* set up the sysfs linkage to our parent device */ smbus->adapter.dev.parent = smbus->dev; + smbus->use_irq = 0; + init_completion(&smbus->irq_completion); if (smbus->hw_rev != PASEMI_HW_REV_PCI) smbus->hw_rev = reg_read(smbus, REG_REV); + reg_write(smbus, REG_IMASK, 0); + pasemi_reset(smbus); error = devm_i2c_add_adapter(smbus->dev, &smbus->adapter); @@ -356,3 +369,12 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus) return 0; } + +irqreturn_t pasemi_irq_handler(int irq, void *dev_id) +{ + struct pasemi_smbus *smbus = dev_id; + + reg_write(smbus, REG_IMASK, 0); + complete(&smbus->irq_completion); + return IRQ_HANDLED; +} diff --git a/drivers/i2c/busses/i2c-pasemi-core.h b/drivers/i2c/busses/i2c-pasemi-core.h index 4655124a37f3a61bd7d0074c6d6f859317651de9..88821f4e8a9f8ac183b3d736d9b58de795bdc058 100644 --- a/drivers/i2c/busses/i2c-pasemi-core.h +++ b/drivers/i2c/busses/i2c-pasemi-core.h @@ -7,6 +7,7 @@ #include #include #include +#include #define PASEMI_HW_REV_PCI -1 @@ -16,6 +17,10 @@ struct pasemi_smbus { void __iomem *ioaddr; unsigned int clk_div; int hw_rev; + int use_irq; + struct completion irq_completion; }; int pasemi_i2c_common_probe(struct pasemi_smbus *smbus); + +irqreturn_t pasemi_irq_handler(int irq, void *dev_id); diff --git a/drivers/i2c/busses/i2c-pasemi-platform.c b/drivers/i2c/busses/i2c-pasemi-platform.c index 88a54aaf7e3c324a0595e2ff1c2080e6f353ce66..e35945a91dbef40b5c58301ca0da7d694fc4afa6 100644 --- a/drivers/i2c/busses/i2c-pasemi-platform.c +++ b/drivers/i2c/busses/i2c-pasemi-platform.c @@ -49,6 +49,7 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev) struct pasemi_smbus *smbus; u32 frequency; int error; + int irq_num; data = devm_kzalloc(dev, sizeof(struct pasemi_platform_i2c_data), GFP_KERNEL); @@ -82,6 +83,11 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev) if (error) goto out_clk_disable; + irq_num = platform_get_irq(pdev, 0); + error = devm_request_irq(smbus->dev, irq_num, pasemi_irq_handler, 0, "pasemi_apple_i2c", (void *)smbus); + + if (!error) + smbus->use_irq = 1; platform_set_drvdata(pdev, data); return 0; diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c index f614cade432bb40ef7bfe8a8aaf27c52e7c67e75..30e38bc8b6db81fb56f4ee33e18662d8993adb7a 100644 --- a/drivers/i2c/busses/i2c-pxa-pci.c +++ b/drivers/i2c/busses/i2c-pxa-pci.c @@ -105,7 +105,7 @@ static int ce4100_i2c_probe(struct pci_dev *dev, int i; struct ce4100_devices *sds; - ret = pci_enable_device_mem(dev); + ret = pcim_enable_device(dev); if (ret) return ret; @@ -114,10 +114,8 @@ static int ce4100_i2c_probe(struct pci_dev *dev, return -EINVAL; } sds = kzalloc(sizeof(*sds), GFP_KERNEL); - if (!sds) { - ret = -ENOMEM; - goto err_mem; - } + if (!sds) + return -ENOMEM; for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) { sds->pdev[i] = add_i2c_device(dev, i); @@ -133,8 +131,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev, err_dev_add: kfree(sds); -err_mem: - pci_disable_device(dev); return ret; } diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c index 8fce98bb77ff974a23bebfc51e9189e7d11d592e..fd70794bfceecf0879cf8e39c0e569ba3e420894 100644 --- a/drivers/i2c/busses/i2c-qcom-geni.c +++ b/drivers/i2c/busses/i2c-qcom-geni.c @@ -88,6 +88,7 @@ struct geni_i2c_dev { int cur_wr; int cur_rd; spinlock_t lock; + struct clk *core_clk; u32 clk_freq_out; const struct geni_i2c_clk_fld *clk_fld; int suspended; @@ -100,6 +101,13 @@ struct geni_i2c_dev { bool abort_done; }; +struct geni_i2c_desc { + bool has_core_clk; + char *icc_ddr; + bool no_dma_support; + unsigned int tx_fifo_depth; +}; + struct geni_i2c_err_log { int err; const char *msg; @@ -763,6 +771,7 @@ static int geni_i2c_probe(struct platform_device *pdev) u32 proto, tx_depth, fifo_disable; int ret; struct device *dev = &pdev->dev; + const struct geni_i2c_desc *desc = NULL; gi2c = devm_kzalloc(dev, sizeof(*gi2c), GFP_KERNEL); if (!gi2c) @@ -775,6 +784,14 @@ static int geni_i2c_probe(struct platform_device *pdev) if (IS_ERR(gi2c->se.base)) return PTR_ERR(gi2c->se.base); + desc = device_get_match_data(&pdev->dev); + + if (desc && desc->has_core_clk) { + gi2c->core_clk = devm_clk_get(dev, "core"); + if (IS_ERR(gi2c->core_clk)) + return PTR_ERR(gi2c->core_clk); + } + gi2c->se.clk = devm_clk_get(dev, "se"); if (IS_ERR(gi2c->se.clk) && !has_acpi_companion(dev)) return PTR_ERR(gi2c->se.clk); @@ -818,7 +835,7 @@ static int geni_i2c_probe(struct platform_device *pdev) gi2c->adap.dev.of_node = dev->of_node; strscpy(gi2c->adap.name, "Geni-I2C", sizeof(gi2c->adap.name)); - ret = geni_icc_get(&gi2c->se, "qup-memory"); + ret = geni_icc_get(&gi2c->se, desc ? desc->icc_ddr : "qup-memory"); if (ret) return ret; /* @@ -828,12 +845,17 @@ static int geni_i2c_probe(struct platform_device *pdev) */ gi2c->se.icc_paths[GENI_TO_CORE].avg_bw = GENI_DEFAULT_BW; gi2c->se.icc_paths[CPU_TO_GENI].avg_bw = GENI_DEFAULT_BW; - gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out); + if (!desc || desc->icc_ddr) + gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out); ret = geni_icc_set_bw(&gi2c->se); if (ret) return ret; + ret = clk_prepare_enable(gi2c->core_clk); + if (ret) + return ret; + ret = geni_se_resources_on(&gi2c->se); if (ret) { dev_err(dev, "Error turning on resources %d\n", ret); @@ -843,10 +865,15 @@ static int geni_i2c_probe(struct platform_device *pdev) if (proto != GENI_SE_I2C) { dev_err(dev, "Invalid proto %d\n", proto); geni_se_resources_off(&gi2c->se); + clk_disable_unprepare(gi2c->core_clk); return -ENXIO; } - fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE; + if (desc && desc->no_dma_support) + fifo_disable = false; + else + fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE; + if (fifo_disable) { /* FIFO is disabled, so we can only use GPI DMA */ gi2c->gpi_mode = true; @@ -858,6 +885,16 @@ static int geni_i2c_probe(struct platform_device *pdev) } else { gi2c->gpi_mode = false; tx_depth = geni_se_get_tx_fifo_depth(&gi2c->se); + + /* I2C Master Hub Serial Elements doesn't have the HW_PARAM_0 register */ + if (!tx_depth && desc) + tx_depth = desc->tx_fifo_depth; + + if (!tx_depth) { + dev_err(dev, "Invalid TX FIFO depth\n"); + return -EINVAL; + } + gi2c->tx_wm = tx_depth - 1; geni_se_init(&gi2c->se, gi2c->tx_wm, tx_depth); geni_se_config_packing(&gi2c->se, BITS_PER_BYTE, @@ -866,6 +903,7 @@ static int geni_i2c_probe(struct platform_device *pdev) dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth); } + clk_disable_unprepare(gi2c->core_clk); ret = geni_se_resources_off(&gi2c->se); if (ret) { dev_err(dev, "Error turning off resources %d\n", ret); @@ -931,6 +969,8 @@ static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev) gi2c->suspended = 1; } + clk_disable_unprepare(gi2c->core_clk); + return geni_icc_disable(&gi2c->se); } @@ -943,6 +983,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev) if (ret) return ret; + ret = clk_prepare_enable(gi2c->core_clk); + if (ret) + return ret; + ret = geni_se_resources_on(&gi2c->se); if (ret) return ret; @@ -981,8 +1025,16 @@ static const struct dev_pm_ops geni_i2c_pm_ops = { NULL) }; +const struct geni_i2c_desc i2c_master_hub = { + .has_core_clk = true, + .icc_ddr = NULL, + .no_dma_support = true, + .tx_fifo_depth = 16, +}; + static const struct of_device_id geni_i2c_dt_match[] = { { .compatible = "qcom,geni-i2c" }, + { .compatible = "qcom,geni-i2c-master-hub", .data = &i2c_master_hub }, {} }; MODULE_DEVICE_TABLE(of, geni_i2c_dt_match); diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c index 3869c258a52965b8e71013e96fae23991cda4551..6aab84c8d22b49368d77cd2068c472490a45cf7c 100644 --- a/drivers/i2c/busses/i2c-tegra.c +++ b/drivers/i2c/busses/i2c-tegra.c @@ -1830,6 +1830,7 @@ static int tegra_i2c_probe(struct platform_device *pdev) i2c_dev->adapter.class = I2C_CLASS_DEPRECATED; i2c_dev->adapter.algo = &tegra_i2c_algo; i2c_dev->adapter.nr = pdev->id; + ACPI_COMPANION_SET(&i2c_dev->adapter.dev, ACPI_COMPANION(&pdev->dev)); if (i2c_dev->hw->supports_bus_clear) i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info; diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c index 277a02455cddd86ce71a85279fcf94218f53094a..bee5a2ef1f229d280d2e55d67267604145f030c6 100644 --- a/drivers/i2c/busses/i2c-xiic.c +++ b/drivers/i2c/busses/i2c-xiic.c @@ -858,11 +858,14 @@ static int xiic_i2c_remove(struct platform_device *pdev) /* remove adapter & data */ i2c_del_adapter(&i2c->adap); - ret = pm_runtime_resume_and_get(i2c->dev); + ret = pm_runtime_get_sync(i2c->dev); + if (ret < 0) - return ret; + dev_warn(&pdev->dev, "Failed to activate device for removal (%pe)\n", + ERR_PTR(ret)); + else + xiic_deinit(i2c); - xiic_deinit(i2c); pm_runtime_put_sync(i2c->dev); clk_disable_unprepare(i2c->clk); pm_runtime_disable(&pdev->dev); diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index 13fafb74bab8d947c08c84e6ff823972cf3ebc6c..087e480b624cb2dde420183fb80a8651c27d8101 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -1018,15 +1018,14 @@ static const struct i2c_device_id dummy_id[] = { { }, }; -static int dummy_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int dummy_probe(struct i2c_client *client) { return 0; } static struct i2c_driver dummy_driver = { .driver.name = "dummy", - .probe = dummy_probe, + .probe_new = dummy_probe, .id_table = dummy_id, }; diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c index 4abc2d9198815498ede3775eb8789847ccdcbae1..5f25f23c4ff8cb3d3573c39f5f52118bb84e4d05 100644 --- a/drivers/i2c/i2c-slave-eeprom.c +++ b/drivers/i2c/i2c-slave-eeprom.c @@ -140,8 +140,9 @@ static int i2c_slave_init_eeprom_data(struct eeprom_data *eeprom, struct i2c_cli return 0; } -static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_device_id *id) +static int i2c_slave_eeprom_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct eeprom_data *eeprom; int ret; unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data) + 1; @@ -206,7 +207,7 @@ static struct i2c_driver i2c_slave_eeprom_driver = { .driver = { .name = "i2c-slave-eeprom", }, - .probe = i2c_slave_eeprom_probe, + .probe_new = i2c_slave_eeprom_probe, .remove = i2c_slave_eeprom_remove, .id_table = i2c_slave_eeprom_id, }; diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 07c92c8495a3c895551507448e11661d8a4ee691..cd19546d31fcb0f9735f432ee9f15c86d2998b01 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -112,8 +112,7 @@ static void smbalert_work(struct work_struct *work) } /* Setup SMBALERT# infrastructure */ -static int smbalert_probe(struct i2c_client *ara, - const struct i2c_device_id *id) +static int smbalert_probe(struct i2c_client *ara) { struct i2c_smbus_alert_setup *setup = dev_get_platdata(&ara->dev); struct i2c_smbus_alert *alert; @@ -170,7 +169,7 @@ static struct i2c_driver smbalert_driver = { .driver = { .name = "smbus_alert", }, - .probe = smbalert_probe, + .probe_new = smbalert_probe, .remove = smbalert_remove, .id_table = smbalert_ids, }; @@ -361,9 +360,15 @@ void i2c_register_spd(struct i2c_adapter *adap) return; } + /* + * Memory types could be found at section 7.18.2 (Memory Device — Type), table 78 + * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.6.0.pdf + */ switch (common_mem_type) { + case 0x12: /* DDR */ case 0x13: /* DDR2 */ case 0x18: /* DDR3 */ + case 0x1B: /* LPDDR */ case 0x1C: /* LPDDR2 */ case 0x1D: /* LPDDR3 */ name = "spd"; diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c index ea83de78f52db626f2c273ddf8baa29874943e29..09d1d9e67e31f39e5b863ced5e9a43e40de23312 100644 --- a/drivers/i2c/muxes/i2c-mux-pca9541.c +++ b/drivers/i2c/muxes/i2c-mux-pca9541.c @@ -283,8 +283,7 @@ static int pca9541_release_chan(struct i2c_mux_core *muxc, u32 chan) /* * I2C init/probing/exit functions */ -static int pca9541_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pca9541_probe(struct i2c_client *client) { struct i2c_adapter *adap = client->adapter; struct i2c_mux_core *muxc; @@ -337,7 +336,7 @@ static struct i2c_driver pca9541_driver = { .name = "pca9541", .of_match_table = of_match_ptr(pca9541_of_match), }, - .probe = pca9541_probe, + .probe_new = pca9541_probe, .remove = pca9541_remove, .id_table = pca9541_id, }; diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c index a5f458b635df6c3e45df5c971875b3bd50f9c2e5..3639e6d7304cd2a13ce83420ab64cc474bf1a044 100644 --- a/drivers/i2c/muxes/i2c-mux-pca954x.c +++ b/drivers/i2c/muxes/i2c-mux-pca954x.c @@ -411,9 +411,9 @@ static int pca954x_init(struct i2c_client *client, struct pca954x *data) /* * I2C init/probing/exit functions */ -static int pca954x_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int pca954x_probe(struct i2c_client *client) { + const struct i2c_device_id *id = i2c_client_get_device_id(client); struct i2c_adapter *adap = client->adapter; struct device *dev = &client->dev; struct gpio_desc *gpio; @@ -554,7 +554,7 @@ static struct i2c_driver pca954x_driver = { .pm = &pca954x_pm, .of_match_table = pca954x_of_match, }, - .probe = pca954x_probe, + .probe_new = pca954x_probe, .remove = pca954x_remove, .id_table = pca954x_id, }; diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c index 0e0679f65cf77e75b9857d7a20d602c0e70da2aa..30a6de1694e0791ea6d70b75b4c35df2ff7cc0d0 100644 --- a/drivers/i2c/muxes/i2c-mux-reg.c +++ b/drivers/i2c/muxes/i2c-mux-reg.c @@ -183,13 +183,12 @@ static int i2c_mux_reg_probe(struct platform_device *pdev) if (!mux->data.reg) { dev_info(&pdev->dev, "Register not set, using platform resource\n"); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mux->data.reg_size = resource_size(res); - mux->data.reg = devm_ioremap_resource(&pdev->dev, res); + mux->data.reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res); if (IS_ERR(mux->data.reg)) { ret = PTR_ERR(mux->data.reg); goto err_put_parent; } + mux->data.reg_size = resource_size(res); } if (mux->data.reg_size != 4 && mux->data.reg_size != 2 && diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c index e92d3e9a52bd55dedc1cd4b3f83d6721a1c97025..9762630b917ea55f86a4241d35974c23892bd77a 100644 --- a/drivers/i3c/device.c +++ b/drivers/i3c/device.c @@ -50,6 +50,26 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev, } EXPORT_SYMBOL_GPL(i3c_device_do_priv_xfers); +/** + * i3c_device_do_setdasa() - do I3C dynamic address assignement with + * static address + * + * @dev: device with which the DAA should be done + * + * Return: 0 in case of success, a negative error core otherwise. + */ +int i3c_device_do_setdasa(struct i3c_device *dev) +{ + int ret; + + i3c_bus_normaluse_lock(dev->bus); + ret = i3c_dev_setdasa_locked(dev->desc); + i3c_bus_normaluse_unlock(dev->bus); + + return ret; +} +EXPORT_SYMBOL_GPL(i3c_device_do_setdasa); + /** * i3c_device_get_info() - get I3C device information * diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h index 86b7b44cfca28be75fdc028f0c5a7460dd552d25..908a807badaf9c0266bc33756d66091b1ee8368c 100644 --- a/drivers/i3c/internals.h +++ b/drivers/i3c/internals.h @@ -15,6 +15,7 @@ extern struct bus_type i3c_bus_type; void i3c_bus_normaluse_lock(struct i3c_bus *bus); void i3c_bus_normaluse_unlock(struct i3c_bus *bus); +int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev); int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, struct i3c_priv_xfer *xfers, int nxfers); diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c index 351c81a929a6c914b21d153d7587fe40dc695c0b..d7e6f6c99aea558f7781de8c34f354aeb0add48b 100644 --- a/drivers/i3c/master.c +++ b/drivers/i3c/master.c @@ -2708,6 +2708,25 @@ int i3c_master_unregister(struct i3c_master_controller *master) } EXPORT_SYMBOL_GPL(i3c_master_unregister); +int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev) +{ + struct i3c_master_controller *master; + + if (!dev) + return -ENOENT; + + master = i3c_dev_get_master(dev); + if (!master) + return -EINVAL; + + if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr || + !dev->boardinfo->static_addr) + return -EINVAL; + + return i3c_master_setdasa_locked(master, dev->info.static_addr, + dev->boardinfo->init_dyn_addr); +} + int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev, struct i3c_priv_xfer *xfers, int nxfers) diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig index aa36ac618e7291067aa218a8f5bce35e8266e021..a5827d11e9346a890c55804052c9bfa21076dde1 100644 --- a/drivers/infiniband/Kconfig +++ b/drivers/infiniband/Kconfig @@ -78,6 +78,7 @@ config INFINIBAND_VIRT_DMA def_bool !HIGHMEM if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS +if !UML source "drivers/infiniband/hw/bnxt_re/Kconfig" source "drivers/infiniband/hw/cxgb4/Kconfig" source "drivers/infiniband/hw/efa/Kconfig" @@ -85,6 +86,7 @@ source "drivers/infiniband/hw/erdma/Kconfig" source "drivers/infiniband/hw/hfi1/Kconfig" source "drivers/infiniband/hw/hns/Kconfig" source "drivers/infiniband/hw/irdma/Kconfig" +source "drivers/infiniband/hw/mana/Kconfig" source "drivers/infiniband/hw/mlx4/Kconfig" source "drivers/infiniband/hw/mlx5/Kconfig" source "drivers/infiniband/hw/mthca/Kconfig" @@ -94,6 +96,7 @@ source "drivers/infiniband/hw/qib/Kconfig" source "drivers/infiniband/hw/usnic/Kconfig" source "drivers/infiniband/hw/vmw_pvrdma/Kconfig" source "drivers/infiniband/sw/rdmavt/Kconfig" +endif # !UML source "drivers/infiniband/sw/rxe/Kconfig" source "drivers/infiniband/sw/siw/Kconfig" endif diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 4084d05a45102dbbedaacf6e38d09d8231f53bac..2e91d88793265800acb86f6e8b9b320ccbc16be3 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr, *vlan_id = vlan_dev_vlan_id(ndev); } else { /* If the netdev is upper device and if it's lower - * device is vlan device, consider vlan id of the + * device is vlan device, consider vlan id of * the lower vlan device for this gid entry. */ netdev_walk_all_lower_dev_rcu(attr->ndev, diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index 1f9938a2c47522b8255239243edd26c7dacf402a..603c0aecc361444d475085016ff6cb60bab620f4 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PKEY_INDEX | IB_QP_PORT; qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - if (cm_id_priv->responder_resources) + if (cm_id_priv->responder_resources) { + struct ib_device *ib_dev = cm_id_priv->id.device; + u64 support_flush = ib_dev->attrs.device_cap_flags & + (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT); + u32 flushable = support_flush ? + (IB_ACCESS_FLUSH_GLOBAL | + IB_ACCESS_FLUSH_PERSISTENT) : 0; + qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ | - IB_ACCESS_REMOTE_ATOMIC; + IB_ACCESS_REMOTE_ATOMIC | + flushable; + } qp_attr->pkey_index = cm_id_priv->av.pkey_index; if (cm_id_priv->av.port) qp_attr->port_num = cm_id_priv->av.port->port_num; diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index aacd6254df77aa73aac87ee6af66abace597a37f..68721ff10255e2c60b15f9c3b5f4b66c73b1525e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL"); #define CMA_CM_RESPONSE_TIMEOUT 20 #define CMA_MAX_CM_RETRIES 15 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24) -#define CMA_IBOE_PACKET_LIFETIME 18 +#define CMA_IBOE_PACKET_LIFETIME 16 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP static const char * const cma_events[] = { diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index b69e2c4e4d2a40443966836e602641a2767cf796..894c06846224e69ad4a28e86fb119aa9b97cd28c 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev, return 0; } + if (old_ndev) + netdev_tracker_free(ndev, &pdata->netdev_tracker); if (ndev) - dev_hold(ndev); + netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC); rcu_assign_pointer(pdata->netdev, ndev); spin_unlock_irqrestore(&pdata->netdev_lock, flags); add_ndev_hash(pdata); if (old_ndev) - dev_put(old_ndev); + __dev_put(old_ndev); return 0; } @@ -2199,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev) * comparisons after the put */ rcu_assign_pointer(pdata->netdev, NULL); - dev_put(ndev); + netdev_put(ndev, &pdata->netdev_tracker); } spin_unlock_irqrestore(&pdata->netdev_lock, flags); } @@ -2851,8 +2853,8 @@ err: static void __exit ib_core_cleanup(void) { roce_gid_mgmt_cleanup(); - nldev_exit(); rdma_nl_unregister(RDMA_NL_LS); + nldev_exit(); unregister_pernet_device(&rdma_dev_net_ops); unregister_blocking_lsm_notifier(&ibdev_lsm_nb); ib_sa_cleanup(); diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 1893aa613ad734f096f0b9b3eaee346fc47ae954..674344eb8e2f48562f3c49694fd5bc86515a8e36 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, struct ib_mad_qp_info *qp_info, struct trace_event_raw_ib_mad_send_template *entry) { - u16 pkey; - struct ib_device *dev = qp_info->port_priv->device; - u32 pnum = qp_info->port_priv->port_num; struct ib_ud_wr *wr = &mad_send_wr->send_wr; struct rdma_ah_attr attr = {}; @@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr, /* These are common */ entry->sl = attr.sl; - ib_query_pkey(dev, pnum, wr->pkey_index, &pkey); - entry->pkey = pkey; entry->rqpn = wr->remote_qpn; entry->rqkey = wr->remote_qkey; entry->dlid = rdma_ah_get_dlid(&attr); diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index 12dc97067ed2b8e5c22e023101247efb4ba78ef9..d5d3e4f0de779ef1e82f06482287b3de355323e1 100644 --- a/drivers/infiniband/core/nldev.c +++ b/drivers/infiniband/core/nldev.c @@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin, /* In create_qp() port is not set yet */ if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port)) - return -EINVAL; + return -EMSGSIZE; ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num); if (ret) @@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin, struct rdma_cm_id *cm_id = &id_priv->id; if (port && port != cm_id->port_num) - return 0; + return -EAGAIN; if (cm_id->port_num && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num)) @@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg, int ret = 0; table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP); + if (!table_attr) + return -EMSGSIZE; rt = &counter->device->res[RDMA_RESTRACK_QP]; xa_lock(&rt->xa); @@ -1041,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_dev_info(msg, device); if (err) @@ -1126,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, NLM_F_MULTI); - if (fill_dev_info(skb, device)) { + if (!nlh || fill_dev_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1185,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto err_free; + } err = fill_port_info(msg, device, port, sock_net(skb->sk)); if (err) @@ -1246,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb, RDMA_NLDEV_CMD_PORT_GET), 0, NLM_F_MULTI); - if (fill_port_info(skb, device, p, sock_net(skb->sk))) { + if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) { nlmsg_cancel(skb, nlh); goto out; } @@ -1288,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_free; + } ret = fill_res_info(msg, device); if (ret) @@ -1319,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET), 0, NLM_F_MULTI); - if (fill_res_info(skb, device)) { + if (!nlh || fill_res_info(skb, device)) { nlmsg_cancel(skb, nlh); goto out; } @@ -1454,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_OP(nlh->nlmsg_type)), 0, 0); - if (fill_nldev_handle(msg, device)) { + if (!nlh || fill_nldev_handle(msg, device)) { ret = -EMSGSIZE; goto err_free; } @@ -1533,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb, RDMA_NL_GET_OP(cb->nlh->nlmsg_type)), 0, NLM_F_MULTI); - if (fill_nldev_handle(skb, device)) { + if (!nlh || fill_nldev_handle(skb, device)) { ret = -EMSGSIZE; goto err; } @@ -1795,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET_CHARDEV), 0, 0); + if (!nlh) { + err = -EMSGSIZE; + goto out_nlmsg; + } data.nl_msg = msg; err = ib_get_client_nl_info(ibdev, client_name, &data); @@ -1852,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_SYS_GET), 0, 0); + if (!nlh) { + nlmsg_free(msg); + return -EMSGSIZE; + } err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE, (u8)ib_devices_shared_netns); @@ -2032,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_free_msg; @@ -2101,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_SET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_fill; + } cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]); qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]); @@ -2171,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb, RDMA_NLDEV_CMD_STAT_GET), 0, 0); - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) { ret = -EMSGSIZE; goto err_msg; @@ -2259,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh, RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET), 0, 0); + if (!nlh) { + ret = -EMSGSIZE; + goto err_msg; + } ret = rdma_counter_get_mode(device, port, &mode, &mask); if (ret) @@ -2391,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb, 0, 0); ret = -EMSGSIZE; - if (fill_nldev_handle(msg, device) || + if (!nlh || fill_nldev_handle(msg, device) || nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) goto err_msg; diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c index 1f935d9f6178548548ddfe624d95b9e4ca76ecc7..01a499a8b88dbd5474542bc1c08ac0180f699777 100644 --- a/drivers/infiniband/core/restrack.c +++ b/drivers/infiniband/core/restrack.c @@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res) rt = &dev->res[res->type]; old = xa_erase(&rt->xa, res->id); - if (res->type == RDMA_RESTRACK_MR) - return; WARN_ON(old != res); out: diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 84c53bd2a52db9f034d2cca76ea4a7c044651eca..ee59d7391568993ac24f3126d12842f2044d67ad 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, p->port_num = port_num; kobject_init(&p->kobj, &port_type); + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = p; + cur_group = p->groups_list; ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list, attr->gid_tbl_len, show_port_gid); @@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num, } list_add_tail(&p->kobj.entry, &coredev->port_list); - if (device->port_data && is_full_dev) - device->port_data[port_num].sysfs = p; - return p; err_groups: @@ -1268,6 +1268,8 @@ err_groups: err_del: kobject_del(&p->kobj); err_put: + if (device->port_data && is_full_dev) + device->port_data[port_num].sysfs = NULL; kobject_put(&p->kobj); return ERR_PTR(ret); } @@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port) { bool is_full_dev = &port->ibdev->coredev == coredev; - if (port->ibdev->port_data && - port->ibdev->port_data[port->port_num].sysfs == port) - port->ibdev->port_data[port->port_num].sysfs = NULL; list_del(&port->kobj.entry); if (is_full_dev) sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups); + sysfs_remove_groups(&port->kobj, port->groups_list); kobject_del(&port->kobj); + + if (port->ibdev->port_data && + port->ibdev->port_data[port->port_num].sysfs == port) + port->ibdev->port_data[port->port_num].sysfs = NULL; + kobject_put(&port->kobj); } diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c index dd1075466f61be67006017192a7edaf901abe052..7b4773fa4bc0bda36693465422ceac28b735116f 100644 --- a/drivers/infiniband/core/uverbs_std_types_qp.c +++ b/drivers/infiniband/core/uverbs_std_types_qp.c @@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)( UVERBS_ATTR_CREATE_QP_SRQ_HANDLE)) return -EINVAL; - /* send_cq is optinal */ + /* send_cq is optional */ if (cap.max_send_wr) { send_cq = uverbs_attr_get_obj(attrs, UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE); diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile index 6b3a88046125adfa58944a22b33534f10a35d2fd..1211f4317a9f4fdab32278e2000c1b2b392e64d5 100644 --- a/drivers/infiniband/hw/Makefile +++ b/drivers/infiniband/hw/Makefile @@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB) += qib/ obj-$(CONFIG_INFINIBAND_CXGB4) += cxgb4/ obj-$(CONFIG_INFINIBAND_EFA) += efa/ obj-$(CONFIG_INFINIBAND_IRDMA) += irdma/ +obj-$(CONFIG_MANA_INFINIBAND) += mana/ obj-$(CONFIG_MLX4_INFINIBAND) += mlx4/ obj-$(CONFIG_MLX5_INFINIBAND) += mlx5/ obj-$(CONFIG_INFINIBAND_OCRDMA) += ocrdma/ diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h index 730783fbc8949217c7e4af3ebfc170411145495b..3d8c11aa23a26fd6847f76cb3293fa2795d6ec96 100644 --- a/drivers/infiniband/hw/erdma/erdma.h +++ b/drivers/infiniband/hw/erdma/erdma.h @@ -124,6 +124,7 @@ struct erdma_devattr { u32 fw_version; unsigned char peer_addr[ETH_ALEN]; + unsigned long cap_flags; int numa_node; enum erdma_cc_alg cc; @@ -189,6 +190,7 @@ struct erdma_dev { struct net_device *netdev; struct pci_dev *pdev; struct notifier_block netdev_nb; + struct workqueue_struct *reflush_wq; resource_size_t func_bar_addr; resource_size_t func_bar_len; @@ -218,7 +220,7 @@ struct erdma_dev { DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT); /* * We provide max 496 uContexts that each has one SQ normal Db, - * and one directWQE db。 + * and one directWQE db. */ DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT); diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c index 58e0dc5c75d1d26ae30c1536b7576d8fafca251a..cabd8678b3558963b3069304ba5372ea0233b3a0 100644 --- a/drivers/infiniband/hw/erdma/erdma_cq.c +++ b/drivers/infiniband/hw/erdma/erdma_cq.c @@ -64,6 +64,8 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = { [ERDMA_OP_REG_MR] = IB_WC_REG_MR, [ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV, [ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ, + [ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP, + [ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD, }; static const struct { diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h index e788887732e1f5e62185cc244d675ae99cebb1f4..ab371fec610c32e3ad236bac9828d4bd8098582f 100644 --- a/drivers/infiniband/hw/erdma/erdma_hw.h +++ b/drivers/infiniband/hw/erdma/erdma_hw.h @@ -145,6 +145,7 @@ enum CMDQ_RDMA_OPCODE { CMDQ_OPCODE_MODIFY_QP = 3, CMDQ_OPCODE_CREATE_CQ = 4, CMDQ_OPCODE_DESTROY_CQ = 5, + CMDQ_OPCODE_REFLUSH = 6, CMDQ_OPCODE_REG_MR = 8, CMDQ_OPCODE_DEREG_MR = 9 }; @@ -224,8 +225,7 @@ struct erdma_cmdq_create_cq_req { /* regmr cfg1 */ #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12) #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6) -#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2) -#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0) +#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1) /* regmr cfg2 */ #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27) @@ -302,8 +302,16 @@ struct erdma_cmdq_destroy_qp_req { u32 qpn; }; +struct erdma_cmdq_reflush_req { + u64 hdr; + u32 qpn; + u32 sq_pi; + u32 rq_pi; +}; + /* cap qword 0 definition */ #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40) +#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24) #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16) #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0) @@ -315,6 +323,10 @@ struct erdma_cmdq_destroy_qp_req { #define ERDMA_NQP_PER_QBLOCK 1024 +enum { + ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7, +}; + #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0) /* CQE hdr */ @@ -340,9 +352,9 @@ struct erdma_cqe { }; struct erdma_sge { - __aligned_le64 laddr; + __aligned_le64 addr; __le32 length; - __le32 lkey; + __le32 key; }; /* Receive Queue Element */ @@ -370,8 +382,7 @@ struct erdma_rqe { #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0) /* REG MR attrs */ -#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0) -#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2) +#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1) #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6) #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12) @@ -410,6 +421,16 @@ struct erdma_readreq_sqe { __le32 rsvd; }; +struct erdma_atomic_sqe { + __le64 hdr; + __le64 rsvd; + __le64 fetchadd_swap_data; + __le64 cmp_data; + + struct erdma_sge remote; + struct erdma_sge sgl; +}; + struct erdma_reg_mr_sqe { __le64 hdr; __le64 addr; @@ -469,7 +490,9 @@ enum erdma_opcode { ERDMA_OP_REG_MR = 14, ERDMA_OP_LOCAL_INV = 15, ERDMA_OP_READ_WITH_INV = 16, - ERDMA_NUM_OPCODES = 17, + ERDMA_OP_ATOMIC_CAS = 17, + ERDMA_OP_ATOMIC_FAD = 18, + ERDMA_NUM_OPCODES = 19, ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1 }; diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c index 49778bb294ae4386095bddf08149454f70875d07..5dc31e5df5cba78ca1025120186fbe891e4551c3 100644 --- a/drivers/infiniband/hw/erdma/erdma_main.c +++ b/drivers/infiniband/hw/erdma/erdma_main.c @@ -374,6 +374,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev) dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1); dev->attrs.max_mr = dev->attrs.max_qp << 1; dev->attrs.max_cq = dev->attrs.max_qp << 1; + dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0); dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR; dev->attrs.max_ord = ERDMA_MAX_ORD; @@ -520,13 +521,22 @@ static int erdma_ib_device_add(struct pci_dev *pdev) u64_to_ether_addr(mac, dev->attrs.peer_addr); + dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND, + WQ_UNBOUND_MAX_ACTIVE); + if (!dev->reflush_wq) { + ret = -ENOMEM; + goto err_alloc_workqueue; + } + ret = erdma_device_register(dev); if (ret) - goto err_out; + goto err_register; return 0; -err_out: +err_register: + destroy_workqueue(dev->reflush_wq); +err_alloc_workqueue: xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); @@ -542,6 +552,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev) unregister_netdevice_notifier(&dev->netdev_nb); ib_unregister_device(&dev->ibdev); + destroy_workqueue(dev->reflush_wq); erdma_res_cb_free(dev); xa_destroy(&dev->qp_xa); xa_destroy(&dev->cq_xa); diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c index 5fe1a339a43543904f528dacbec6d748d70498e3..d088d6bef431afa8c6936219dd2f60a6ce46942e 100644 --- a/drivers/infiniband/hw/erdma/erdma_qp.c +++ b/drivers/infiniband/hw/erdma/erdma_qp.c @@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp, int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, enum erdma_qp_attr_mask mask) { + bool need_reflush = false; int drop_conn, ret = 0; if (!mask) @@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, ret = erdma_modify_qp_state_to_rts(qp, attrs, mask); } else if (attrs->state == ERDMA_QP_STATE_ERROR) { qp->attrs.state = ERDMA_QP_STATE_ERROR; + need_reflush = true; if (qp->cep) { erdma_cep_put(qp->cep); qp->cep = NULL; @@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, case ERDMA_QP_STATE_RTS: drop_conn = 0; - if (attrs->state == ERDMA_QP_STATE_CLOSING) { + if (attrs->state == ERDMA_QP_STATE_CLOSING || + attrs->state == ERDMA_QP_STATE_TERMINATE || + attrs->state == ERDMA_QP_STATE_ERROR) { ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); drop_conn = 1; - } else if (attrs->state == ERDMA_QP_STATE_TERMINATE) { - qp->attrs.state = ERDMA_QP_STATE_TERMINATE; - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - drop_conn = 1; - } else if (attrs->state == ERDMA_QP_STATE_ERROR) { - ret = erdma_modify_qp_state_to_stop(qp, attrs, mask); - qp->attrs.state = ERDMA_QP_STATE_ERROR; - drop_conn = 1; + need_reflush = true; } if (drop_conn) @@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs, break; } + if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) { + qp->flags |= ERDMA_QP_IN_FLUSHING; + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + } + return ret; } @@ -285,15 +288,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset; u32 idx = *pi & (qp->attrs.sq_size - 1); enum ib_wr_opcode op = send_wr->opcode; + struct erdma_atomic_sqe *atomic_sqe; struct erdma_readreq_sqe *read_sqe; struct erdma_reg_mr_sqe *regmr_sge; struct erdma_write_sqe *write_sqe; struct erdma_send_sqe *send_sqe; struct ib_rdma_wr *rdma_wr; - struct erdma_mr *mr; + struct erdma_sge *sge; __le32 *length_field; + struct erdma_mr *mr; u64 wqe_hdr, *entry; - struct ib_sge *sge; u32 attrs; int ret; @@ -360,9 +364,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, qp->attrs.sq_size, SQEBB_SHIFT); - sge->addr = rdma_wr->remote_addr; - sge->lkey = rdma_wr->rkey; - sge->length = send_wr->sg_list[0].length; + sge->addr = cpu_to_le64(rdma_wr->remote_addr); + sge->key = cpu_to_le32(rdma_wr->rkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); wqe_size = sizeof(struct erdma_readreq_sqe) + send_wr->num_sge * sizeof(struct ib_sge); @@ -397,8 +401,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, regmr_sge->addr = cpu_to_le64(mr->ibmr.iova); regmr_sge->length = cpu_to_le32(mr->ibmr.length); regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key); - attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) | - FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | + attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) | FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK, mr->mem.mtt_nents); @@ -424,6 +427,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi, regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey); wqe_size = sizeof(struct erdma_reg_mr_sqe); goto out; + case IB_WR_ATOMIC_CMP_AND_SWP: + case IB_WR_ATOMIC_FETCH_AND_ADD: + atomic_sqe = (struct erdma_atomic_sqe *)entry; + if (op == IB_WR_ATOMIC_CMP_AND_SWP) { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_CAS); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->swap); + atomic_sqe->cmp_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } else { + wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK, + ERDMA_OP_ATOMIC_FAD); + atomic_sqe->fetchadd_swap_data = + cpu_to_le64(atomic_wr(send_wr)->compare_add); + } + + sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1, + qp->attrs.sq_size, SQEBB_SHIFT); + sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr); + sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey); + sge++; + + sge->addr = cpu_to_le64(send_wr->sg_list[0].addr); + sge->key = cpu_to_le32(send_wr->sg_list[0].lkey); + sge->length = cpu_to_le32(send_wr->sg_list[0].length); + + wqe_size = sizeof(*atomic_sqe); + goto out; default: return -EOPNOTSUPP; } @@ -498,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr, } spin_unlock_irqrestore(&qp->lock, flags); + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + return ret; } @@ -551,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr, } spin_unlock_irqrestore(&qp->lock, flags); + + if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING)) + mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork, + usecs_to_jiffies(100)); + return ret; } diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c index 62be98e2b94142f916f25fc9b7b602a59554deaa..5dab1e87975ba225c00f42a55a487c73bb2a5200 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.c +++ b/drivers/infiniband/hw/erdma/erdma_verbs.c @@ -118,8 +118,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr) FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8); req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) | FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) | - FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) | - FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0); + FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access); req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK, ilog2(mr->mem.page_size)) | FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) | @@ -289,6 +288,10 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr, attr->max_mw = dev->attrs.max_mw; attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA; attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT; + + if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC) + attr->atomic_cap = IB_ATOMIC_GLOB; + attr->fw_ver = dev->attrs.fw_version; if (dev->netdev) @@ -376,6 +379,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) return 0; } +static void erdma_flush_worker(struct work_struct *work) +{ + struct delayed_work *dwork = to_delayed_work(work); + struct erdma_qp *qp = + container_of(dwork, struct erdma_qp, reflush_dwork); + struct erdma_cmdq_reflush_req req; + + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, + CMDQ_OPCODE_REFLUSH); + req.qpn = QP_ID(qp); + req.sq_pi = qp->kern_qp.sq_pi; + req.rq_pi = qp->kern_qp.rq_pi; + erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL); +} + static int erdma_qp_validate_cap(struct erdma_dev *dev, struct ib_qp_init_attr *attrs) { @@ -732,6 +750,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs, qp->attrs.max_send_sge = attrs->cap.max_send_sge; qp->attrs.max_recv_sge = attrs->cap.max_recv_sge; qp->attrs.state = ERDMA_QP_STATE_IDLE; + INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker); ret = create_qp_cmd(dev, qp); if (ret) @@ -1025,6 +1044,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE); up_write(&qp->state_lock); + cancel_delayed_work_sync(&qp->reflush_dwork); + erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA, CMDQ_OPCODE_DESTROY_QP); req.qpn = QP_ID(qp); diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h index ab6380635e9e64b35f6f757df671ed9bb19af13e..e0a993bc032a44aaa76e399ead33ccf8f6f0734e 100644 --- a/drivers/infiniband/hw/erdma/erdma_verbs.h +++ b/drivers/infiniband/hw/erdma/erdma_verbs.h @@ -71,16 +71,18 @@ struct erdma_pd { #define ERDMA_MR_INLINE_MTT 0 #define ERDMA_MR_INDIRECT_MTT 1 -#define ERDMA_MR_ACC_LR BIT(0) -#define ERDMA_MR_ACC_LW BIT(1) -#define ERDMA_MR_ACC_RR BIT(2) -#define ERDMA_MR_ACC_RW BIT(3) +#define ERDMA_MR_ACC_RA BIT(0) +#define ERDMA_MR_ACC_LR BIT(1) +#define ERDMA_MR_ACC_LW BIT(2) +#define ERDMA_MR_ACC_RR BIT(3) +#define ERDMA_MR_ACC_RW BIT(4) static inline u8 to_erdma_access_flags(int access) { return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) | (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) | - (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0); + (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) | + (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0); } struct erdma_mem { @@ -171,6 +173,10 @@ enum erdma_qp_attr_mask { ERDMA_QP_ATTR_MPA = (1 << 7) }; +enum erdma_qp_flags { + ERDMA_QP_IN_FLUSHING = (1 << 0), +}; + struct erdma_qp_attrs { enum erdma_qp_state state; enum erdma_cc_alg cc; /* Congestion control algorithm */ @@ -195,6 +201,9 @@ struct erdma_qp { struct erdma_cep *cep; struct rw_semaphore state_lock; + unsigned long flags; + struct delayed_work reflush_dwork; + union { struct erdma_kqp kern_qp; struct erdma_uqp user_qp; diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c index 877f8e84a672a4ccd5278aef24b3582400f9e31a..77ee77d4000fbfdedde9978473a1865a998eec9b 100644 --- a/drivers/infiniband/hw/hfi1/affinity.c +++ b/drivers/infiniband/hw/hfi1/affinity.c @@ -177,6 +177,8 @@ out: for (node = 0; node < node_affinity.num_possible_nodes; node++) hfi1_per_node_cntr[node] = 1; + pci_dev_put(dev); + return 0; } diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c index 8e71bef9d982652fb5018ef877ddfe048eab426e..bcc6bc0540f0330caed530b3f47a5b836e4387c7 100644 --- a/drivers/infiniband/hw/hfi1/driver.c +++ b/drivers/infiniband/hw/hfi1/driver.c @@ -112,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp) cap_mask &= ~HFI1_CAP_LOCKED_SMASK; cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT); - return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask); + return sysfs_emit(buffer, "0x%lx\n", cap_mask); } struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi) diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c index 1d77514ebbee0dad6c3162c8b86fc45d5763be46..0c0cef5b1e0e555afdf9e5dc577e5ade38009d09 100644 --- a/drivers/infiniband/hw/hfi1/firmware.c +++ b/drivers/infiniband/hw/hfi1/firmware.c @@ -1743,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd) if (!dd->platform_config.data) { dd_dev_err(dd, "%s: Missing config file\n", __func__); + ret = -EINVAL; goto bail; } ptr = (u32 *)dd->platform_config.data; @@ -1751,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd) ptr++; if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) { dd_dev_err(dd, "%s: Bad config file\n", __func__); + ret = -EINVAL; goto bail; } @@ -1774,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd) if (file_length > dd->platform_config.size) { dd_dev_info(dd, "%s:File claims to be larger than read size\n", __func__); + ret = -EINVAL; goto bail; } else if (file_length < dd->platform_config.size) { dd_dev_info(dd, @@ -1794,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd) dd_dev_err(dd, "%s: Failed validation at offset %ld\n", __func__, (ptr - (u32 *) dd->platform_config.data)); + ret = -EINVAL; goto bail; } @@ -1837,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd) __func__, table_type, (ptr - (u32 *) dd->platform_config.data)); + ret = -EINVAL; goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table = ptr; @@ -1856,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd) __func__, table_type, (ptr - (u32 *)dd->platform_config.data)); + ret = -EINVAL; goto bail; /* We don't trust this file now */ } pcfgcache->config_tables[table_type].table_metadata = diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c index 4146a2113a9540ea0e7435364c6ee5b7ef158beb..e5e783c45810b2a0135e7eeb977f609f939bcb26 100644 --- a/drivers/infiniband/hw/hfi1/mad.c +++ b/drivers/infiniband/hw/hfi1/mad.c @@ -2437,9 +2437,9 @@ struct opa_port_data_counters_msg { __be64 port_vl_xmit_wait_data; __be64 port_vl_rcv_bubble; __be64 port_vl_mark_fecn; - } vls[0]; + } vls[]; /* array size defined by #bits set in vl_select_mask*/ - } port[1]; /* array size defined by #ports in attribute modifier */ + } port; }; struct opa_port_error_counters64_msg { @@ -2470,9 +2470,9 @@ struct opa_port_error_counters64_msg { u8 reserved3[7]; struct _vls_ectrs { __be64 port_vl_xmit_discards; - } vls[0]; + } vls[]; /* array size defined by #bits set in vl_select_mask */ - } port[1]; /* array size defined by #ports in attribute modifier */ + } port; }; struct opa_port_error_info_msg { @@ -2543,7 +2543,7 @@ struct opa_port_error_info_msg { u8 error_info; } __packed fm_config_ei; __u32 reserved9; - } port[1]; /* actual array size defined by #ports in attr modifier */ + } port; }; /* opa_port_error_info_msg error_info_select_mask bit definitions */ @@ -2966,7 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, } /* Sanity check */ - response_data_size = struct_size(req, port[0].vls, num_vls); + response_data_size = struct_size(req, port.vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -2986,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = &req->port[0]; + rsp = &req->port; memset(rsp, 0, sizeof(*rsp)); rsp->port_number = port; @@ -3182,7 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - response_data_size = struct_size(req, port[0].vls, num_vls); + response_data_size = struct_size(req, port.vls, num_vls); if (response_data_size > sizeof(pmp->data)) { pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD; @@ -3201,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp, return reply((struct ib_mad_hdr *)pmp); } - rsp = &req->port[0]; + rsp = &req->port; ibp = to_iport(ibdev, port_num); ppd = ppd_from_ibp(ibp); @@ -3340,7 +3340,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp, u64 reg; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = &req->port[0]; + rsp = &req->port; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); @@ -3590,7 +3590,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp, u32 error_info_select; req = (struct opa_port_error_info_msg *)pmp->data; - rsp = &req->port[0]; + rsp = &req->port; num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod)); num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3])); diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c index 3dfa5aff251256624541eba3926b5f009c0f5632..720d4c85c9c93fd09b87143e53e394270725fb8a 100644 --- a/drivers/infiniband/hw/hfi1/netdev_rx.c +++ b/drivers/infiniband/hw/hfi1/netdev_rx.c @@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx) * right now. */ set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state); - netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64); + netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi); rc = msix_netdev_request_rcd_irq(rxq->rcd); if (rc) goto bail_context_irq_failure; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 723e55a7de8d9c8b9d7c57df08c6fcb9d72a2001..f701cc86896b38379dd5fdc180f813d9916a4176 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -202,6 +202,7 @@ struct hns_roce_ucontext { struct list_head page_list; struct mutex page_mutex; struct hns_user_mmap_entry *db_mmap_entry; + u32 config; }; struct hns_roce_pd { @@ -334,6 +335,7 @@ struct hns_roce_wq { u32 head; u32 tail; void __iomem *db_reg; + u32 ext_sge_cnt; }; struct hns_roce_sge { @@ -635,6 +637,7 @@ struct hns_roce_qp { struct list_head rq_node; /* all recv qps are on a list */ struct list_head sq_node; /* all send qps are on a list */ struct hns_user_mmap_entry *dwqe_mmap_entry; + u32 config; }; struct hns_roce_ib_iboe { diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c index 1435fe2ea176f31c8796f17bf71398b6cab8742d..b2421883993b104ad1e48c5bdd01a501d66eccb9 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c @@ -192,7 +192,6 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, unsigned int *sge_idx, u32 msg_len) { struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev; - unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE; unsigned int left_len_in_pg; unsigned int idx = *sge_idx; unsigned int i = 0; @@ -200,7 +199,7 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp, void *addr; void *dseg; - if (msg_len > ext_sge_sz) { + if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) { ibdev_err(ibdev, "no enough extended sge space for inline data.\n"); return -EINVAL; @@ -1274,6 +1273,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev) hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR; } +static int hns_roce_cmd_err_convert_errno(u16 desc_ret) +{ + struct hns_roce_cmd_errcode errcode_table[] = { + {CMD_EXEC_SUCCESS, 0}, + {CMD_NO_AUTH, -EPERM}, + {CMD_NOT_EXIST, -EOPNOTSUPP}, + {CMD_CRQ_FULL, -EXFULL}, + {CMD_NEXT_ERR, -ENOSR}, + {CMD_NOT_EXEC, -ENOTBLK}, + {CMD_PARA_ERR, -EINVAL}, + {CMD_RESULT_ERR, -ERANGE}, + {CMD_TIMEOUT, -ETIME}, + {CMD_HILINK_ERR, -ENOLINK}, + {CMD_INFO_ILLEGAL, -ENXIO}, + {CMD_INVALID, -EBADR}, + }; + u16 i; + + for (i = 0; i < ARRAY_SIZE(errcode_table); i++) + if (desc_ret == errcode_table[i].return_status) + return errcode_table[i].errno; + return -EIO; +} + static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, struct hns_roce_cmq_desc *desc, int num) { @@ -1319,7 +1342,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev, dev_err_ratelimited(hr_dev->dev, "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n", desc->opcode, desc_ret); - ret = -EIO; + ret = hns_roce_cmd_err_convert_errno(desc_ret); } } else { /* FW/HW reset or incorrect number of desc */ @@ -2024,13 +2047,14 @@ static void set_default_caps(struct hns_roce_dev *hr_dev) caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW | HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR | - HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC; + HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL; caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM; if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) { caps->flags |= HNS_ROCE_CAP_FLAG_STASH | - HNS_ROCE_CAP_FLAG_DIRECT_WQE; + HNS_ROCE_CAP_FLAG_DIRECT_WQE | + HNS_ROCE_CAP_FLAG_XRC; caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE; } else { caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE; @@ -2342,6 +2366,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev) caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM); caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM); + if (!(caps->page_size_cap & PAGE_SIZE)) + caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED; + return 0; } @@ -2631,31 +2658,124 @@ static void free_dip_list(struct hns_roce_dev *hr_dev) spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags); } -static void free_mr_exit(struct hns_roce_dev *hr_dev) +static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_pd *hr_pd; + struct ib_pd *pd; + + hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_pd)) + return NULL; + pd = &hr_pd->ibpd; + pd->device = ibdev; + + if (hns_roce_alloc_pd(pd, NULL)) { + ibdev_err(ibdev, "failed to create pd for free mr.\n"); + kfree(hr_pd); + return NULL; + } + free_mr->rsv_pd = to_hr_pd(pd); + free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev; + free_mr->rsv_pd->ibpd.uobject = NULL; + free_mr->rsv_pd->ibpd.__internal_mr = NULL; + atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0); + + return pd; +} + +static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct ib_cq_init_attr cq_init_attr = {}; + struct hns_roce_cq *hr_cq; + struct ib_cq *cq; + + cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; + + hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_cq)) + return NULL; + + cq = &hr_cq->ib_cq; + cq->device = ibdev; + + if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) { + ibdev_err(ibdev, "failed to create cq for free mr.\n"); + kfree(hr_cq); + return NULL; + } + free_mr->rsv_cq = to_hr_cq(cq); + free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev; + free_mr->rsv_cq->ib_cq.uobject = NULL; + free_mr->rsv_cq->ib_cq.comp_handler = NULL; + free_mr->rsv_cq->ib_cq.event_handler = NULL; + free_mr->rsv_cq->ib_cq.cq_context = NULL; + atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0); + + return cq; +} + +static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq, + struct ib_qp_init_attr *init_attr, int i) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_qp *hr_qp; + struct ib_qp *qp; int ret; + + hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL); + if (ZERO_OR_NULL_PTR(hr_qp)) + return -ENOMEM; + + qp = &hr_qp->ibqp; + qp->device = ibdev; + + ret = hns_roce_create_qp(qp, init_attr, NULL); + if (ret) { + ibdev_err(ibdev, "failed to create qp for free mr.\n"); + kfree(hr_qp); + return ret; + } + + free_mr->rsv_qp[i] = hr_qp; + free_mr->rsv_qp[i]->ibqp.recv_cq = cq; + free_mr->rsv_qp[i]->ibqp.send_cq = cq; + + return 0; +} + +static void free_mr_exit(struct hns_roce_dev *hr_dev) +{ + struct hns_roce_v2_priv *priv = hr_dev->priv; + struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; + struct ib_qp *qp; int i; for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { if (free_mr->rsv_qp[i]) { - ret = ib_destroy_qp(free_mr->rsv_qp[i]); - if (ret) - ibdev_err(&hr_dev->ib_dev, - "failed to destroy qp in free mr.\n"); - + qp = &free_mr->rsv_qp[i]->ibqp; + hns_roce_v2_destroy_qp(qp, NULL); + kfree(free_mr->rsv_qp[i]); free_mr->rsv_qp[i] = NULL; } } if (free_mr->rsv_cq) { - ib_destroy_cq(free_mr->rsv_cq); + hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL); + kfree(free_mr->rsv_cq); free_mr->rsv_cq = NULL; } if (free_mr->rsv_pd) { - ib_dealloc_pd(free_mr->rsv_pd); + hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL); + kfree(free_mr->rsv_pd); free_mr->rsv_pd = NULL; } } @@ -2664,55 +2784,46 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev) { struct hns_roce_v2_priv *priv = hr_dev->priv; struct hns_roce_v2_free_mr *free_mr = &priv->free_mr; - struct ib_device *ibdev = &hr_dev->ib_dev; - struct ib_cq_init_attr cq_init_attr = {}; struct ib_qp_init_attr qp_init_attr = {}; struct ib_pd *pd; struct ib_cq *cq; - struct ib_qp *qp; int ret; int i; - pd = ib_alloc_pd(ibdev, 0); - if (IS_ERR(pd)) { - ibdev_err(ibdev, "failed to create pd for free mr.\n"); - return PTR_ERR(pd); - } - free_mr->rsv_pd = pd; + pd = free_mr_init_pd(hr_dev); + if (!pd) + return -ENOMEM; - cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM; - cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr); - if (IS_ERR(cq)) { - ibdev_err(ibdev, "failed to create cq for free mr.\n"); - ret = PTR_ERR(cq); - goto create_failed; + cq = free_mr_init_cq(hr_dev); + if (!cq) { + ret = -ENOMEM; + goto create_failed_cq; } - free_mr->rsv_cq = cq; qp_init_attr.qp_type = IB_QPT_RC; qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; - qp_init_attr.send_cq = free_mr->rsv_cq; - qp_init_attr.recv_cq = free_mr->rsv_cq; + qp_init_attr.send_cq = cq; + qp_init_attr.recv_cq = cq; for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM; qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM; qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM; qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM; - qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr); - if (IS_ERR(qp)) { - ibdev_err(ibdev, "failed to create qp for free mr.\n"); - ret = PTR_ERR(qp); - goto create_failed; - } - - free_mr->rsv_qp[i] = qp; + ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i); + if (ret) + goto create_failed_qp; } return 0; -create_failed: - free_mr_exit(hr_dev); +create_failed_qp: + hns_roce_destroy_cq(cq, NULL); + kfree(cq); + +create_failed_cq: + hns_roce_dealloc_pd(pd, NULL); + kfree(pd); return ret; } @@ -2728,14 +2839,17 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, int mask; int ret; - hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]); + hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp); hr_qp->free_mr_en = 1; + hr_qp->ibqp.device = ibdev; + hr_qp->ibqp.qp_type = IB_QPT_RC; mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS; attr->qp_state = IB_QPS_INIT; attr->port_num = 1; attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE; - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, + IB_QPS_INIT); if (ret) { ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n", ret); @@ -2756,7 +2870,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num); - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT, + IB_QPS_RTR); hr_dev->loop_idc = loopback; if (ret) { ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n", @@ -2770,7 +2885,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev, attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN; attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT; attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT; - ret = ib_modify_qp(&hr_qp->ibqp, attr, mask); + ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR, + IB_QPS_RTS); if (ret) ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n", ret); @@ -3186,7 +3302,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev, int i, count; count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages, - ARRAY_SIZE(pages), &pbl_ba); + min_t(int, ARRAY_SIZE(pages), mr->npages), + &pbl_ba); if (count < 1) { ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n", count); @@ -3414,7 +3531,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) mutex_lock(&free_mr->mutex); for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) { - hr_qp = to_hr_qp(free_mr->rsv_qp[i]); + hr_qp = free_mr->rsv_qp[i]; ret = free_mr_post_send_lp_wqe(hr_qp); if (ret) { @@ -3429,7 +3546,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev) end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies; while (cqe_cnt) { - npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc); + npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc); if (npolled < 0) { ibdev_err(ibdev, "failed to poll cqe for free mr, remain %d cqe.\n", @@ -5375,6 +5492,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, rdma_ah_set_sl(&qp_attr->ah_attr, hr_reg_read(&context, QPC_SL)); + rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1); + rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH); grh->flow_label = hr_reg_read(&context, QPC_FL); grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX); grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT); @@ -5468,7 +5587,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev, return ret; } -static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device); struct hns_roce_qp *hr_qp = to_hr_qp(ibqp); diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h index c7bf2d52c1cdb254ffe3dbbacfcf061944679909..b1b3e1e0b84e55d8774285e75a8760204e8cad6a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h +++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h @@ -272,6 +272,11 @@ enum hns_roce_cmd_return_status { CMD_OTHER_ERR = 0xff }; +struct hns_roce_cmd_errcode { + enum hns_roce_cmd_return_status return_status; + int errno; +}; + enum hns_roce_sgid_type { GID_TYPE_FLAG_ROCE_V1 = 0, GID_TYPE_FLAG_ROCE_V2_IPV4, @@ -1327,9 +1332,9 @@ struct hns_roce_link_table { #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2) struct hns_roce_v2_free_mr { - struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; - struct ib_cq *rsv_cq; - struct ib_pd *rsv_pd; + struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM]; + struct hns_roce_cq *rsv_cq; + struct hns_roce_pd *rsv_pd; struct mutex mutex; }; @@ -1459,6 +1464,8 @@ struct hns_roce_sccc_clr_done { __le32 rsv[5]; }; +int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); + static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2], void __iomem *dest) { diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index dcf89689a4c628d908af905131b01f54716caf37..8ba68ac12388de16e4fff6825a5965a79a8ff5b7 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx) static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { - int ret; struct hns_roce_ucontext *context = to_hr_ucontext(uctx); - struct hns_roce_ib_alloc_ucontext_resp resp = {}; struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device); + struct hns_roce_ib_alloc_ucontext_resp resp = {}; + struct hns_roce_ib_alloc_ucontext ucmd = {}; + int ret; if (!hr_dev->active) return -EAGAIN; @@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx, resp.qp_tab_size = hr_dev->caps.num_qps; resp.srq_tab_size = hr_dev->caps.num_srqs; + ret = ib_copy_from_udata(&ucmd, udata, + min(udata->inlen, sizeof(ucmd))); + if (ret) + return ret; + + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS; + + if (context->config & HNS_ROCE_EXSGE_FLAGS) { + resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS; + resp.max_inline_data = hr_dev->caps.max_sq_inline; + } + ret = hns_roce_uar_alloc(hr_dev, &context->uar); if (ret) goto error_fail_uar_alloc; diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c index 845ac7d3831f420d26f446ed7e36dca5f6201a03..37a5cf62f88b48b12293946e311bdd5d067edbaf 100644 --- a/drivers/infiniband/hw/hns/hns_roce_mr.c +++ b/drivers/infiniband/hw/hns/hns_roce_mr.c @@ -392,10 +392,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, return &mr->ibmr; -err_key: - free_mr_key(hr_dev, mr); err_pbl: free_mr_pbl(hr_dev, mr); +err_key: + free_mr_key(hr_dev, mr); err_free: kfree(mr); return ERR_PTR(ret); diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c index f0bd82a18069a69054251d9be85c57d4ba21e683..0ae335fb205cadb4df9a99d8ebfaa9fe1d7dbe20 100644 --- a/drivers/infiniband/hw/hns/hns_roce_qp.c +++ b/drivers/infiniband/hw/hns/hns_roce_qp.c @@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap, return 0; } -static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp) +static u32 get_max_inline_data(struct hns_roce_dev *hr_dev, + struct ib_qp_cap *cap) { - /* GSI/UD QP only has extended sge */ - if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD) - return qp->sq.max_gs; - - if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE) - return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE; + if (cap->max_inline_data) { + cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data); + return min(cap->max_inline_data, + hr_dev->caps.max_sq_inline); + } return 0; } +static void update_inline_data(struct hns_roce_qp *hr_qp, + struct ib_qp_cap *cap) +{ + u32 sge_num = hr_qp->sq.ext_sge_cnt; + + if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) { + if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD)) + sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num); + + cap->max_inline_data = max(cap->max_inline_data, + sge_num * HNS_ROCE_SGE_SIZE); + } + + hr_qp->max_inline_data = cap->max_inline_data; +} + +static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi, + u32 max_send_sge) +{ + unsigned int std_sge_num; + unsigned int min_sge; + + std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE; + min_sge = is_ud_or_gsi ? 1 : 0; + return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) : + min_sge; +} + +static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi, + u32 max_inline_data) +{ + unsigned int inline_sge; + + inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE; + + /* + * if max_inline_data less than + * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE, + * In addition to ud's mode, no need to extend sge. + */ + if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE) + inline_sge = 0; + + return inline_sge; +} + static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt, struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap) { + bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI || + hr_qp->ibqp.qp_type == IB_QPT_UD); + unsigned int std_sge_num; + u32 inline_ext_sge = 0; + u32 ext_wqe_sge_cnt; u32 total_sge_cnt; - u32 wqe_sge_cnt; + + cap->max_inline_data = get_max_inline_data(hr_dev, cap); hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT; + std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE; + ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi, + cap->max_send_sge); - hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) { + inline_ext_sge = max(ext_wqe_sge_cnt, + get_sge_num_from_max_inl_data(is_ud_or_gsi, + cap->max_inline_data)); + hr_qp->sq.ext_sge_cnt = inline_ext_sge ? + roundup_pow_of_two(inline_ext_sge) : 0; - wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp); + hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num)); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + + ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt; + } else { + hr_qp->sq.max_gs = max(1U, cap->max_send_sge); + hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg); + hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs; + } /* If the number of extended sge is not zero, they MUST use the * space of HNS_HW_PAGE_SIZE at least. */ - if (wqe_sge_cnt) { - total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt); + if (ext_wqe_sge_cnt) { + total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt); hr_qp->sge.sge_cnt = max(total_sge_cnt, (u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE); } + + update_inline_data(hr_qp, cap); } static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev, @@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev, hr_qp->sq.wqe_shift = ucmd->log_sq_stride; hr_qp->sq.wqe_cnt = cnt; + cap->max_send_sge = hr_qp->sq.max_gs; return 0; } @@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, struct hns_roce_ib_create_qp *ucmd) { struct ib_device *ibdev = &hr_dev->ib_dev; + struct hns_roce_ucontext *uctx; int ret; - if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline) - init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline; - - hr_qp->max_inline_data = init_attr->cap.max_inline_data; - if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR; else @@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp, return ret; } + uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext, + ibucontext); + hr_qp->config = uctx->config; ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd); if (ret) ibdev_err(ibdev, "failed to set user SQ size, ret = %d.\n", ret); } else { + if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) + hr_qp->config = HNS_ROCE_EXSGE_FLAGS; ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp); if (ret) ibdev_err(ibdev, diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c index a6e5d350a94ce8f1e3499fe736bd56cb4abf1fce..16183e894da77b1742291591e15321c56405b06e 100644 --- a/drivers/infiniband/hw/irdma/uk.c +++ b/drivers/infiniband/hw/irdma/uk.c @@ -566,21 +566,37 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe, /** * irdma_copy_inline_data_gen_1 - Copy inline data to wqe - * @dest: pointer to wqe - * @src: pointer to inline data - * @len: length of inline data to copy + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: Total inline data length * @polarity: compatibility parameter */ -static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len, - u8 polarity) +static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list, + u32 num_sges, u8 polarity) { - if (len <= 16) { - memcpy(dest, src, len); - } else { - memcpy(dest, src, 16); - src += 16; - dest = dest + 32; - memcpy(dest, src, len - 16); + u32 quanta_bytes_remaining = 16; + int i; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + /* Remaining inline bytes reside after hdr */ + wqe += 16; + quanta_bytes_remaining = 32; + } + } } } @@ -612,35 +628,51 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe, /** * irdma_copy_inline_data - Copy inline data to wqe - * @dest: pointer to wqe - * @src: pointer to inline data - * @len: length of inline data to copy + * @wqe: pointer to wqe + * @sge_list: table of pointers to inline data + * @num_sges: number of SGE's * @polarity: polarity of wqe valid bit */ -static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity) +static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list, + u32 num_sges, u8 polarity) { u8 inline_valid = polarity << IRDMA_INLINE_VALID_S; - u32 copy_size; - - dest += 8; - if (len <= 8) { - memcpy(dest, src, len); - return; - } - - *((u64 *)dest) = *((u64 *)src); - len -= 8; - src += 8; - dest += 24; /* point to additional 32 byte quanta */ - - while (len) { - copy_size = len < 31 ? len : 31; - memcpy(dest, src, copy_size); - *(dest + 31) = inline_valid; - len -= copy_size; - dest += 32; - src += copy_size; + u32 quanta_bytes_remaining = 8; + bool first_quanta = true; + int i; + + wqe += 8; + + for (i = 0; i < num_sges; i++) { + u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr; + u32 sge_len = sge_list[i].length; + + while (sge_len) { + u32 bytes_copied; + + bytes_copied = min(sge_len, quanta_bytes_remaining); + memcpy(wqe, cur_sge, bytes_copied); + wqe += bytes_copied; + cur_sge += bytes_copied; + quanta_bytes_remaining -= bytes_copied; + sge_len -= bytes_copied; + + if (!quanta_bytes_remaining) { + quanta_bytes_remaining = 31; + + /* Remaining inline bytes reside after hdr */ + if (first_quanta) { + first_quanta = false; + wqe += 16; + } else { + *wqe = inline_valid; + wqe++; + } + } + } } + if (!first_quanta && quanta_bytes_remaining < 31) + *(wqe + quanta_bytes_remaining) = inline_valid; } /** @@ -679,20 +711,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; - struct irdma_inline_rdma_write *op_info; + struct irdma_rdma_write *op_info; u64 hdr = 0; u32 wqe_idx; bool read_fence = false; + u32 i, total_size = 0; u16 quanta; info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.inline_rdma_write; + op_info = &info->op.rdma_write; + + if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges)) + return -EINVAL; + + for (i = 0; i < op_info->num_lo_sges; i++) + total_size += op_info->lo_sg_list[i].length; - if (op_info->len > qp->max_inline_data) + if (unlikely(total_size > qp->max_inline_data)) return -EINVAL; - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) return -ENOMEM; @@ -705,7 +744,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) | FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) | @@ -719,7 +758,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp, set_64bit_val(wqe, 0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list, + op_info->num_lo_sges, qp->swqe_polarity); dma_wmb(); /* make sure WQE is populated before valid bit is set */ @@ -745,20 +785,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, struct irdma_post_sq_info *info, bool post_sq) { __le64 *wqe; - struct irdma_post_inline_send *op_info; + struct irdma_post_send *op_info; u64 hdr; u32 wqe_idx; bool read_fence = false; + u32 i, total_size = 0; u16 quanta; info->push_wqe = qp->push_db ? true : false; - op_info = &info->op.inline_send; + op_info = &info->op.send; + + if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges)) + return -EINVAL; - if (op_info->len > qp->max_inline_data) + for (i = 0; i < op_info->num_sges; i++) + total_size += op_info->sg_list[i].length; + + if (unlikely(total_size > qp->max_inline_data)) return -EINVAL; - quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len); - wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len, + quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size); + wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size, info); if (!wqe) return -ENOMEM; @@ -773,7 +820,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) | FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) | FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) | - FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) | + FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) | FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, (info->imm_data_valid ? 1 : 0)) | FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) | @@ -789,8 +836,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp, if (info->imm_data_valid) set_64bit_val(wqe, 0, FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data)); - qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len, - qp->swqe_polarity); + qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list, + op_info->num_sges, qp->swqe_polarity); dma_wmb(); /* make sure WQE is populated before valid bit is set */ @@ -1002,11 +1049,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, __le64 *cqe; struct irdma_qp_uk *qp; struct irdma_ring *pring = NULL; - u32 wqe_idx, q_type; + u32 wqe_idx; int ret_code; bool move_cq_head = true; u8 polarity; - u8 op_type; bool ext_valid; __le64 *ext_cqe; @@ -1074,7 +1120,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->ud_vlan_valid = false; } - q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); + info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3); info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3); info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3); @@ -1113,8 +1159,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, } wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3); info->qp_handle = (irdma_qp_handle)(unsigned long)qp; + info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3); - if (q_type == IRDMA_CQE_QTYPE_RQ) { + if (info->q_type == IRDMA_CQE_QTYPE_RQ) { u32 array_idx; array_idx = wqe_idx / qp->rq_wqe_size_multiplier; @@ -1134,10 +1181,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0); - if (info->imm_valid) - info->op_type = IRDMA_OP_TYPE_REC_IMM; - else - info->op_type = IRDMA_OP_TYPE_REC; if (qword3 & IRDMACQ_STAG) { info->stag_invalid_set = true; info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2); @@ -1195,17 +1238,18 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq, sw_wqe = qp->sq_base[tail].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); - op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword); - info->op_type = op_type; + info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, + wqe_qword); IRDMA_RING_SET_TAIL(qp->sq_ring, tail + qp->sq_wrtrk_array[tail].quanta); - if (op_type != IRDMAQP_OP_NOP) { + if (info->op_type != IRDMAQP_OP_NOP) { info->wr_id = qp->sq_wrtrk_array[tail].wrid; info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len; break; } } while (1); - if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR) + if (info->op_type == IRDMA_OP_TYPE_BIND_MW && + info->minor_err == FLUSH_PROT_ERR) info->minor_err = FLUSH_MW_BIND_ERR; qp->sq_flush_seen = true; if (!IRDMA_RING_MORE_WORK(qp->sq_ring)) diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h index 2ef61923c92685a5164008eceff7fcb246b664df..d0cdf609f5e06aa71c24a5471b63b8f8aab2ba36 100644 --- a/drivers/infiniband/hw/irdma/user.h +++ b/drivers/infiniband/hw/irdma/user.h @@ -173,14 +173,6 @@ struct irdma_post_send { u32 ah_id; }; -struct irdma_post_inline_send { - void *data; - u32 len; - u32 qkey; - u32 dest_qp; - u32 ah_id; -}; - struct irdma_post_rq_info { u64 wr_id; struct ib_sge *sg_list; @@ -193,12 +185,6 @@ struct irdma_rdma_write { struct ib_sge rem_addr; }; -struct irdma_inline_rdma_write { - void *data; - u32 len; - struct ib_sge rem_addr; -}; - struct irdma_rdma_read { struct ib_sge *lo_sg_list; u32 num_lo_sges; @@ -241,8 +227,6 @@ struct irdma_post_sq_info { struct irdma_rdma_read rdma_read; struct irdma_bind_window bind_window; struct irdma_inv_local_stag inv_local_stag; - struct irdma_inline_rdma_write inline_rdma_write; - struct irdma_post_inline_send inline_send; } op; }; @@ -261,6 +245,7 @@ struct irdma_cq_poll_info { u16 ud_vlan; u8 ud_smac[6]; u8 op_type; + u8 q_type; bool stag_invalid_set:1; /* or L_R_Key set */ bool push_dropped:1; bool error:1; @@ -291,7 +276,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp, bool post_sq); struct irdma_wqe_uk_ops { - void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity); + void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list, + u32 num_sges, u8 polarity); u16 (*iw_inline_data_size_to_quanta)(u32 data_size); void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge, u8 valid); diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c index 8dfc9e154d733aba6dbfc47b82276dfdfe710c75..445e69e864097ef85adbf351387eccd08150ffcb 100644 --- a/drivers/infiniband/hw/irdma/utils.c +++ b/drivers/infiniband/hw/irdma/utils.c @@ -2591,6 +2591,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) sw_wqe = qp->sq_base[wqe_idx].elem; get_64bit_val(sw_wqe, 24, &wqe_qword); cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE); + cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ; /* remove the SQ WR by moving SQ tail*/ IRDMA_RING_SET_TAIL(*sq_ring, sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta); @@ -2629,6 +2630,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp) cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx]; cmpl->cpi.op_type = IRDMA_OP_TYPE_REC; + cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ; /* remove the RQ WR by moving RQ tail */ IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1); ibdev_dbg(iwqp->iwrcq->ibcq.device, diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c index a22afbb25bc58d3870e15f09cddbd3dc4880052a..f6973ea55eda7eefc518231b997d3a9811286bee 100644 --- a/drivers/infiniband/hw/irdma/verbs.c +++ b/drivers/infiniband/hw/irdma/verbs.c @@ -63,36 +63,6 @@ static int irdma_query_device(struct ib_device *ibdev, return 0; } -/** - * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed - * @link_speed: netdev phy link speed - * @active_speed: IB port speed - * @active_width: IB port width - */ -static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed, - u8 *active_width) -{ - if (link_speed <= SPEED_1000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_SDR; - } else if (link_speed <= SPEED_10000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_FDR10; - } else if (link_speed <= SPEED_20000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_DDR; - } else if (link_speed <= SPEED_25000) { - *active_width = IB_WIDTH_1X; - *active_speed = IB_SPEED_EDR; - } else if (link_speed <= SPEED_40000) { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_FDR10; - } else { - *active_width = IB_WIDTH_4X; - *active_speed = IB_SPEED_EDR; - } -} - /** * irdma_query_port - get port attributes * @ibdev: device pointer from stack @@ -120,8 +90,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port, props->state = IB_PORT_DOWN; props->phys_state = IB_PORT_PHYS_STATE_DISABLED; } - irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed, - &props->active_width); + + ib_get_eth_speed(ibdev, port, &props->active_speed, + &props->active_width); if (rdma_protocol_roce(ibdev, 1)) { props->gid_tbl_len = 32; @@ -1242,6 +1213,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr, av->attrs = attr->ah_attr; rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid); rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid); + av->net_type = rdma_gid_attr_network_type(sgid_attr); if (av->net_type == RDMA_NETWORK_IPV6) { __be32 *daddr = av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32; @@ -2358,9 +2330,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc, * @rf: RDMA PCI function * @iwmr: mr pointer for this memory registration * @use_pbles: flag if to use pble's + * @lvl_1_only: request only level 1 pble if true */ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, - bool use_pbles) + bool use_pbles, bool lvl_1_only) { struct irdma_pbl *iwpbl = &iwmr->iwpbl; struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc; @@ -2371,7 +2344,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr, if (use_pbles) { status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt, - false); + lvl_1_only); if (status) return status; @@ -2414,16 +2387,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev, bool ret = true; pg_size = iwmr->page_size; - err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles); + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true); if (err) return err; - if (use_pbles && palloc->level != PBLE_LEVEL_1) { - irdma_free_pble(iwdev->rf->pble_rsrc, palloc); - iwpbl->pbl_allocated = false; - return -ENOMEM; - } - if (use_pbles) arr = palloc->level1.addr; @@ -2899,7 +2866,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len, case IRDMA_MEMREG_TYPE_MEM: use_pbles = (iwmr->page_cnt != 1); - err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles); + err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false); if (err) goto error; @@ -3165,30 +3132,20 @@ static int irdma_post_send(struct ib_qp *ibqp, info.stag_to_inv = ib_wr->ex.invalidate_rkey; } - if (ib_wr->send_flags & IB_SEND_INLINE) { - info.op.inline_send.data = (void *)(unsigned long) - ib_wr->sg_list[0].addr; - info.op.inline_send.len = ib_wr->sg_list[0].length; - if (iwqp->ibqp.qp_type == IB_QPT_UD || - iwqp->ibqp.qp_type == IB_QPT_GSI) { - ah = to_iwah(ud_wr(ib_wr)->ah); - info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx; - info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey; - info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn; - } + info.op.send.num_sges = ib_wr->num_sge; + info.op.send.sg_list = ib_wr->sg_list; + if (iwqp->ibqp.qp_type == IB_QPT_UD || + iwqp->ibqp.qp_type == IB_QPT_GSI) { + ah = to_iwah(ud_wr(ib_wr)->ah); + info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; + info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; + info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; + } + + if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_send(ukqp, &info, false); - } else { - info.op.send.num_sges = ib_wr->num_sge; - info.op.send.sg_list = ib_wr->sg_list; - if (iwqp->ibqp.qp_type == IB_QPT_UD || - iwqp->ibqp.qp_type == IB_QPT_GSI) { - ah = to_iwah(ud_wr(ib_wr)->ah); - info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx; - info.op.send.qkey = ud_wr(ib_wr)->remote_qkey; - info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn; - } + else err = irdma_uk_send(ukqp, &info, false); - } break; case IB_WR_RDMA_WRITE_WITH_IMM: if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) { @@ -3205,22 +3162,15 @@ static int irdma_post_send(struct ib_qp *ibqp, else info.op_type = IRDMA_OP_TYPE_RDMA_WRITE; - if (ib_wr->send_flags & IB_SEND_INLINE) { - info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr; - info.op.inline_rdma_write.len = - ib_wr->sg_list[0].length; - info.op.inline_rdma_write.rem_addr.addr = - rdma_wr(ib_wr)->remote_addr; - info.op.inline_rdma_write.rem_addr.lkey = - rdma_wr(ib_wr)->rkey; + info.op.rdma_write.num_lo_sges = ib_wr->num_sge; + info.op.rdma_write.lo_sg_list = ib_wr->sg_list; + info.op.rdma_write.rem_addr.addr = + rdma_wr(ib_wr)->remote_addr; + info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; + if (ib_wr->send_flags & IB_SEND_INLINE) err = irdma_uk_inline_rdma_write(ukqp, &info, false); - } else { - info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list; - info.op.rdma_write.num_lo_sges = ib_wr->num_sge; - info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr; - info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey; + else err = irdma_uk_rdma_write(ukqp, &info, false); - } break; case IB_WR_RDMA_READ_WITH_INV: inv_stag = true; @@ -3380,7 +3330,6 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode static void irdma_process_cqe(struct ib_wc *entry, struct irdma_cq_poll_info *cq_poll_info) { - struct irdma_qp *iwqp; struct irdma_sc_qp *qp; entry->wc_flags = 0; @@ -3388,7 +3337,6 @@ static void irdma_process_cqe(struct ib_wc *entry, entry->wr_id = cq_poll_info->wr_id; qp = cq_poll_info->qp_handle; - iwqp = qp->qp_uk.back_qp; entry->qp = qp->qp_uk.back_qp; if (cq_poll_info->error) { @@ -3421,42 +3369,17 @@ static void irdma_process_cqe(struct ib_wc *entry, } } - switch (cq_poll_info->op_type) { - case IRDMA_OP_TYPE_RDMA_WRITE: - case IRDMA_OP_TYPE_RDMA_WRITE_SOL: - entry->opcode = IB_WC_RDMA_WRITE; - break; - case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: - case IRDMA_OP_TYPE_RDMA_READ: - entry->opcode = IB_WC_RDMA_READ; - break; - case IRDMA_OP_TYPE_SEND_INV: - case IRDMA_OP_TYPE_SEND_SOL: - case IRDMA_OP_TYPE_SEND_SOL_INV: - case IRDMA_OP_TYPE_SEND: - entry->opcode = IB_WC_SEND; - break; - case IRDMA_OP_TYPE_FAST_REG_NSMR: - entry->opcode = IB_WC_REG_MR; - break; - case IRDMA_OP_TYPE_INV_STAG: - entry->opcode = IB_WC_LOCAL_INV; - break; - case IRDMA_OP_TYPE_REC_IMM: - case IRDMA_OP_TYPE_REC: - entry->opcode = cq_poll_info->op_type == IRDMA_OP_TYPE_REC_IMM ? - IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV; + if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) { + set_ib_wc_op_sq(cq_poll_info, entry); + } else { + set_ib_wc_op_rq(cq_poll_info, entry, + qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ? + true : false); if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD && cq_poll_info->stag_invalid_set) { entry->ex.invalidate_rkey = cq_poll_info->inv_stag; entry->wc_flags |= IB_WC_WITH_INVALIDATE; } - break; - default: - ibdev_err(&iwqp->iwdev->ibdev, - "Invalid opcode = %d in CQE\n", cq_poll_info->op_type); - entry->status = IB_WC_GENERAL_ERR; - return; } if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) { diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h index 4309b7159f42c8c634cb1e4293c3d5e5f64985c7..a536e9fa85ebf1b84a5865401e787dc3278b9e8e 100644 --- a/drivers/infiniband/hw/irdma/verbs.h +++ b/drivers/infiniband/hw/irdma/verbs.h @@ -232,6 +232,59 @@ static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev) return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]); } +static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry) +{ + switch (cq_poll_info->op_type) { + case IRDMA_OP_TYPE_RDMA_WRITE: + case IRDMA_OP_TYPE_RDMA_WRITE_SOL: + entry->opcode = IB_WC_RDMA_WRITE; + break; + case IRDMA_OP_TYPE_RDMA_READ_INV_STAG: + case IRDMA_OP_TYPE_RDMA_READ: + entry->opcode = IB_WC_RDMA_READ; + break; + case IRDMA_OP_TYPE_SEND_SOL: + case IRDMA_OP_TYPE_SEND_SOL_INV: + case IRDMA_OP_TYPE_SEND_INV: + case IRDMA_OP_TYPE_SEND: + entry->opcode = IB_WC_SEND; + break; + case IRDMA_OP_TYPE_FAST_REG_NSMR: + entry->opcode = IB_WC_REG_MR; + break; + case IRDMA_OP_TYPE_INV_STAG: + entry->opcode = IB_WC_LOCAL_INV; + break; + default: + entry->status = IB_WC_GENERAL_ERR; + } +} + +static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info, + struct ib_wc *entry, bool send_imm_support) +{ + /** + * iWARP does not support sendImm, so the presence of Imm data + * must be WriteImm. + */ + if (!send_imm_support) { + entry->opcode = cq_poll_info->imm_valid ? + IB_WC_RECV_RDMA_WITH_IMM : + IB_WC_RECV; + return; + } + + switch (cq_poll_info->op_type) { + case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE: + case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE: + entry->opcode = IB_WC_RECV_RDMA_WITH_IMM; + break; + default: + entry->opcode = IB_WC_RECV; + } +} + void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4); int irdma_ib_register_device(struct irdma_device *iwdev); void irdma_ib_unregister_device(struct irdma_device *iwdev); diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..546640657bac28370d53baa73fb14b0039a66815 --- /dev/null +++ b/drivers/infiniband/hw/mana/Kconfig @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +config MANA_INFINIBAND + tristate "Microsoft Azure Network Adapter support" + depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA + help + This driver provides low-level RDMA support for Microsoft Azure + Network Adapter (MANA). MANA supports RDMA features that can be used + for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly + access hardware from user-mode processes in Microsoft Azure cloud + environment. diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..88655fe5e398a8d18a7e61cfe4012a5ca674c49c --- /dev/null +++ b/drivers/infiniband/hw/mana/Makefile @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o + +mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c new file mode 100644 index 0000000000000000000000000000000000000000..d141cab8a1e6986ac24a4dc51ea1b8050af078a1 --- /dev/null +++ b/drivers/infiniband/hw/mana/cq.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct ib_device *ibdev = ibcq->device; + struct mana_ib_create_cq ucmd = {}; + struct mana_ib_dev *mdev; + int err; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + if (udata->inlen < sizeof(ucmd)) + return -EINVAL; + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(ibdev, + "Failed to copy from udata for create cq, %d\n", err); + return err; + } + + if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe); + return -EINVAL; + } + + cq->cqe = attr->cqe; + cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(cq->umem)) { + err = PTR_ERR(cq->umem); + ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n", + err); + return err; + } + + err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region); + if (err) { + ibdev_dbg(ibdev, + "Failed to create dma region for create cq, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(ibdev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, cq->gdma_region); + + /* + * The CQ ID is not known at this time. The ID is generated at create_qp + */ + + return 0; + +err_release_umem: + ib_umem_release(cq->umem); + return err; +} + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) +{ + struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq); + struct ib_device *ibdev = ibcq->device; + struct mana_ib_dev *mdev; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region); + ib_umem_release(cq->umem); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c new file mode 100644 index 0000000000000000000000000000000000000000..d4541b8707e4c75a191a09dd2890447b45895ed1 --- /dev/null +++ b/drivers/infiniband/hw/mana/device.c @@ -0,0 +1,117 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" +#include + +MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver"); +MODULE_LICENSE("GPL"); +MODULE_IMPORT_NS(NET_MANA); + +static const struct ib_device_ops mana_ib_dev_ops = { + .owner = THIS_MODULE, + .driver_id = RDMA_DRIVER_MANA, + .uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION, + + .alloc_pd = mana_ib_alloc_pd, + .alloc_ucontext = mana_ib_alloc_ucontext, + .create_cq = mana_ib_create_cq, + .create_qp = mana_ib_create_qp, + .create_rwq_ind_table = mana_ib_create_rwq_ind_table, + .create_wq = mana_ib_create_wq, + .dealloc_pd = mana_ib_dealloc_pd, + .dealloc_ucontext = mana_ib_dealloc_ucontext, + .dereg_mr = mana_ib_dereg_mr, + .destroy_cq = mana_ib_destroy_cq, + .destroy_qp = mana_ib_destroy_qp, + .destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table, + .destroy_wq = mana_ib_destroy_wq, + .disassociate_ucontext = mana_ib_disassociate_ucontext, + .get_port_immutable = mana_ib_get_port_immutable, + .mmap = mana_ib_mmap, + .modify_qp = mana_ib_modify_qp, + .modify_wq = mana_ib_modify_wq, + .query_device = mana_ib_query_device, + .query_gid = mana_ib_query_gid, + .query_port = mana_ib_query_port, + .reg_user_mr = mana_ib_reg_user_mr, + + INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq), + INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd), + INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp), + INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext), + INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table, + ib_ind_table), +}; + +static int mana_ib_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mana_adev *madev = container_of(adev, struct mana_adev, adev); + struct gdma_dev *mdev = madev->mdev; + struct mana_context *mc; + struct mana_ib_dev *dev; + int ret; + + mc = mdev->driver_data; + + dev = ib_alloc_device(mana_ib_dev, ib_dev); + if (!dev) + return -ENOMEM; + + ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops); + + dev->ib_dev.phys_port_cnt = mc->num_ports; + + ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev, + mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt); + + dev->gdma_dev = mdev; + dev->ib_dev.node_type = RDMA_NODE_IB_CA; + + /* + * num_comp_vectors needs to set to the max MSIX index + * when interrupts and event queues are implemented + */ + dev->ib_dev.num_comp_vectors = 1; + dev->ib_dev.dev.parent = mdev->gdma_context->dev; + + ret = ib_register_device(&dev->ib_dev, "mana_%d", + mdev->gdma_context->dev); + if (ret) { + ib_dealloc_device(&dev->ib_dev); + return ret; + } + + dev_set_drvdata(&adev->dev, dev); + + return 0; +} + +static void mana_ib_remove(struct auxiliary_device *adev) +{ + struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev); + + ib_unregister_device(&dev->ib_dev); + ib_dealloc_device(&dev->ib_dev); +} + +static const struct auxiliary_device_id mana_id_table[] = { + { + .name = "mana.rdma", + }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mana_id_table); + +static struct auxiliary_driver mana_driver = { + .name = "rdma", + .probe = mana_ib_probe, + .remove = mana_ib_remove, + .id_table = mana_id_table, +}; + +module_auxiliary_driver(mana_driver); diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c new file mode 100644 index 0000000000000000000000000000000000000000..8b3bc302d6f3a3cd7984e7653e1636de98978dd2 --- /dev/null +++ b/drivers/infiniband/hw/mana/main.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, + u32 port) +{ + struct gdma_dev *gd = dev->gdma_dev; + struct mana_port_context *mpc; + struct net_device *ndev; + struct mana_context *mc; + + mc = gd->driver_data; + ndev = mc->ports[port]; + mpc = netdev_priv(ndev); + + mutex_lock(&pd->vport_mutex); + + pd->vport_use_count--; + WARN_ON(pd->vport_use_count < 0); + + if (!pd->vport_use_count) + mana_uncfg_vport(mpc); + + mutex_unlock(&pd->vport_mutex); +} + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd, + u32 doorbell_id) +{ + struct gdma_dev *mdev = dev->gdma_dev; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + int err; + + mc = mdev->driver_data; + ndev = mc->ports[port]; + mpc = netdev_priv(ndev); + + mutex_lock(&pd->vport_mutex); + + pd->vport_use_count++; + if (pd->vport_use_count > 1) { + ibdev_dbg(&dev->ib_dev, + "Skip as this PD is already configured vport\n"); + mutex_unlock(&pd->vport_mutex); + return 0; + } + + err = mana_cfg_vport(mpc, pd->pdn, doorbell_id); + if (err) { + pd->vport_use_count--; + mutex_unlock(&pd->vport_mutex); + + ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err); + return err; + } + + mutex_unlock(&pd->vport_mutex); + + pd->tx_shortform_allowed = mpc->tx_shortform_allowed; + pd->tx_vp_offset = mpc->tx_vp_offset; + + ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n", + mpc->port_handle, pd->pdn, doorbell_id); + + return 0; +} + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct ib_device *ibdev = ibpd->device; + struct gdma_create_pd_resp resp = {}; + struct gdma_create_pd_req req = {}; + enum gdma_pd_flags flags = 0; + struct mana_ib_dev *dev; + struct gdma_dev *mdev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + mdev = dev->gdma_dev; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req), + sizeof(resp)); + + req.flags = flags; + err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, + sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to get pd_id err %d status %u\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + pd->pd_handle = resp.pd_handle; + pd->pdn = resp.pd_id; + ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n", + pd->pd_handle, pd->pdn); + + mutex_init(&pd->vport_mutex); + pd->vport_use_count = 0; + return 0; +} + +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct ib_device *ibdev = ibpd->device; + struct gdma_destory_pd_resp resp = {}; + struct gdma_destroy_pd_req req = {}; + struct mana_ib_dev *dev; + struct gdma_dev *mdev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + mdev = dev->gdma_dev; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req), + sizeof(resp)); + + req.pd_handle = pd->pd_handle; + err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req, + sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to destroy pd_handle 0x%llx err %d status %u", + pd->pd_handle, err, resp.hdr.status); + if (!err) + err = -EPROTO; + } + + return err; +} + +static int mana_gd_destroy_doorbell_page(struct gdma_context *gc, + int doorbell_page) +{ + struct gdma_destroy_resource_range_req req = {}; + struct gdma_resp_hdr resp = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE, + sizeof(req), sizeof(resp)); + + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; + req.num_resources = 1; + req.allocated_resources = doorbell_page; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.status) { + dev_err(gc->dev, + "Failed to destroy doorbell page: ret %d, 0x%x\n", + err, resp.status); + return err ?: -EPROTO; + } + + return 0; +} + +static int mana_gd_allocate_doorbell_page(struct gdma_context *gc, + int *doorbell_page) +{ + struct gdma_allocate_resource_range_req req = {}; + struct gdma_allocate_resource_range_resp resp = {}; + int err; + + mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE, + sizeof(req), sizeof(resp)); + + req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE; + req.num_resources = 1; + req.alignment = 1; + + /* Have GDMA start searching from 0 */ + req.allocated_resources = 0; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, + "Failed to allocate doorbell page: ret %d, 0x%x\n", + err, resp.hdr.status); + return err ?: -EPROTO; + } + + *doorbell_page = resp.allocated_resources; + + return 0; +} + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, + struct ib_udata *udata) +{ + struct mana_ib_ucontext *ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + struct gdma_dev *dev; + int doorbell_page; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + dev = mdev->gdma_dev; + gc = dev->gdma_context; + + /* Allocate a doorbell page index */ + ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page); + if (ret) { + ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret); + return ret; + } + + ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page); + + ucontext->doorbell = doorbell_page; + + return 0; +} + +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) +{ + struct mana_ib_ucontext *mana_ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell); + if (ret) + ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret); +} + +static int +mana_ib_gd_first_dma_region(struct mana_ib_dev *dev, + struct gdma_context *gc, + struct gdma_create_dma_region_req *create_req, + size_t num_pages, mana_handle_t *gdma_region) +{ + struct gdma_create_dma_region_resp create_resp = {}; + unsigned int create_req_msg_size; + int err; + + create_req_msg_size = + struct_size(create_req, page_addr_list, num_pages); + create_req->page_addr_list_len = num_pages; + + err = mana_gd_send_request(gc, create_req_msg_size, create_req, + sizeof(create_resp), &create_resp); + if (err || create_resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, + "Failed to create DMA region: %d, 0x%x\n", + err, create_resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + *gdma_region = create_resp.dma_region_handle; + ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n", + *gdma_region); + + return 0; +} + +static int +mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc, + struct gdma_dma_region_add_pages_req *add_req, + unsigned int num_pages, u32 expected_status) +{ + unsigned int add_req_msg_size = + struct_size(add_req, page_addr_list, num_pages); + struct gdma_general_resp add_resp = {}; + int err; + + mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES, + add_req_msg_size, sizeof(add_resp)); + add_req->page_addr_list_len = num_pages; + + err = mana_gd_send_request(gc, add_req_msg_size, add_req, + sizeof(add_resp), &add_resp); + if (err || add_resp.hdr.status != expected_status) { + ibdev_dbg(&dev->ib_dev, + "Failed to create DMA region: %d, 0x%x\n", + err, add_resp.hdr.status); + + if (!err) + err = -EPROTO; + + return err; + } + + return 0; +} + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, + mana_handle_t *gdma_region) +{ + struct gdma_dma_region_add_pages_req *add_req = NULL; + size_t num_pages_processed = 0, num_pages_to_handle; + struct gdma_create_dma_region_req *create_req; + unsigned int create_req_msg_size; + struct hw_channel_context *hwc; + struct ib_block_iter biter; + size_t max_pgs_add_cmd = 0; + size_t max_pgs_create_cmd; + struct gdma_context *gc; + size_t num_pages_total; + struct gdma_dev *mdev; + unsigned long page_sz; + unsigned int tail = 0; + u64 *page_addr_list; + void *request_buf; + int err; + + mdev = dev->gdma_dev; + gc = mdev->gdma_context; + hwc = gc->hwc.driver_data; + + /* Hardware requires dma region to align to chosen page size */ + page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0); + if (!page_sz) { + ibdev_dbg(&dev->ib_dev, "failed to find page size.\n"); + return -ENOMEM; + } + num_pages_total = ib_umem_num_dma_blocks(umem, page_sz); + + max_pgs_create_cmd = + (hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64); + num_pages_to_handle = + min_t(size_t, num_pages_total, max_pgs_create_cmd); + create_req_msg_size = + struct_size(create_req, page_addr_list, num_pages_to_handle); + + request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL); + if (!request_buf) + return -ENOMEM; + + create_req = request_buf; + mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION, + create_req_msg_size, + sizeof(struct gdma_create_dma_region_resp)); + + create_req->length = umem->length; + create_req->offset_in_page = umem->address & (page_sz - 1); + create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT; + create_req->page_count = num_pages_total; + + ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n", + umem->length, num_pages_total); + + ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n", + page_sz, create_req->offset_in_page); + + ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u", + num_pages_to_handle, create_req->gdma_page_type); + + page_addr_list = create_req->page_addr_list; + rdma_umem_for_each_dma_block(umem, &biter, page_sz) { + page_addr_list[tail++] = rdma_block_iter_dma_address(&biter); + if (tail < num_pages_to_handle) + continue; + + if (!num_pages_processed) { + /* First create message */ + err = mana_ib_gd_first_dma_region(dev, gc, create_req, + tail, gdma_region); + if (err) + goto out; + + max_pgs_add_cmd = (hwc->max_req_msg_size - + sizeof(*add_req)) / sizeof(u64); + + add_req = request_buf; + add_req->dma_region_handle = *gdma_region; + add_req->reserved3 = 0; + page_addr_list = add_req->page_addr_list; + } else { + /* Subsequent create messages */ + u32 expected_s = 0; + + if (num_pages_processed + num_pages_to_handle < + num_pages_total) + expected_s = GDMA_STATUS_MORE_ENTRIES; + + err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail, + expected_s); + if (err) + break; + } + + num_pages_processed += tail; + tail = 0; + + /* The remaining pages to create */ + num_pages_to_handle = + min_t(size_t, + num_pages_total - num_pages_processed, + max_pgs_add_cmd); + } + + if (err) + mana_ib_gd_destroy_dma_region(dev, *gdma_region); + +out: + kfree(request_buf); + return err; +} + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region) +{ + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + + gc = mdev->gdma_context; + ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region); + + return mana_gd_destroy_dma_region(gc, gdma_region); +} + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma) +{ + struct mana_ib_ucontext *mana_ucontext = + container_of(ibcontext, struct mana_ib_ucontext, ibucontext); + struct ib_device *ibdev = ibcontext->device; + struct mana_ib_dev *mdev; + struct gdma_context *gc; + phys_addr_t pfn; + pgprot_t prot; + int ret; + + mdev = container_of(ibdev, struct mana_ib_dev, ib_dev); + gc = mdev->gdma_dev->gdma_context; + + if (vma->vm_pgoff != 0) { + ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff); + return -EINVAL; + } + + /* Map to the page indexed by ucontext->doorbell */ + pfn = (gc->phys_db_page_base + + gc->db_page_size * mana_ucontext->doorbell) >> + PAGE_SHIFT; + prot = pgprot_writecombine(vma->vm_page_prot); + + ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot, + NULL); + if (ret) + ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret); + else + ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n", + pfn, gc->db_page_size, ret); + + return ret; +} + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, + struct ib_port_immutable *immutable) +{ + /* + * This version only support RAW_PACKET + * other values need to be filled for other types + */ + immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET; + + return 0; +} + +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw) +{ + props->max_qp = MANA_MAX_NUM_QUEUES; + props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE; + + /* + * max_cqe could be potentially much bigger. + * As this version of driver only support RAW QP, set it to the same + * value as max_qp_wr + */ + props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE; + + props->max_mr_size = MANA_IB_MAX_MR_SIZE; + props->max_mr = MANA_IB_MAX_MR; + props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES; + props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES; + + return 0; +} + +int mana_ib_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *props) +{ + /* This version doesn't return port properties */ + return 0; +} + +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, + union ib_gid *gid) +{ + /* This version doesn't return GID properties */ + return 0; +} + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext) +{ +} diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h new file mode 100644 index 0000000000000000000000000000000000000000..502cc8672eefa2cf7711841fca93fa94d99d1df8 --- /dev/null +++ b/drivers/infiniband/hw/mana/mana_ib.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 Microsoft Corporation. All rights reserved. + */ + +#ifndef _MANA_IB_H_ +#define _MANA_IB_H_ + +#include +#include +#include +#include +#include + +#include + +#define PAGE_SZ_BM \ + (SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K | \ + SZ_512K | SZ_1M | SZ_2M) + +/* MANA doesn't have any limit for MR size */ +#define MANA_IB_MAX_MR_SIZE U64_MAX + +/* + * The hardware limit of number of MRs is greater than maximum number of MRs + * that can possibly represent in 24 bits + */ +#define MANA_IB_MAX_MR 0xFFFFFFu + +struct mana_ib_dev { + struct ib_device ib_dev; + struct gdma_dev *gdma_dev; +}; + +struct mana_ib_wq { + struct ib_wq ibwq; + struct ib_umem *umem; + int wqe; + u32 wq_buf_size; + u64 gdma_region; + u64 id; + mana_handle_t rx_object; +}; + +struct mana_ib_pd { + struct ib_pd ibpd; + u32 pdn; + mana_handle_t pd_handle; + + /* Mutex for sharing access to vport_use_count */ + struct mutex vport_mutex; + int vport_use_count; + + bool tx_shortform_allowed; + u32 tx_vp_offset; +}; + +struct mana_ib_mr { + struct ib_mr ibmr; + struct ib_umem *umem; + mana_handle_t mr_handle; +}; + +struct mana_ib_cq { + struct ib_cq ibcq; + struct ib_umem *umem; + int cqe; + u64 gdma_region; + u64 id; +}; + +struct mana_ib_qp { + struct ib_qp ibqp; + + /* Work queue info */ + struct ib_umem *sq_umem; + int sqe; + u64 sq_gdma_region; + u64 sq_id; + mana_handle_t tx_object; + + /* The port on the IB device, starting with 1 */ + u32 port; +}; + +struct mana_ib_ucontext { + struct ib_ucontext ibucontext; + u32 doorbell; +}; + +struct mana_ib_rwq_ind_table { + struct ib_rwq_ind_table ib_ind_table; +}; + +int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem, + mana_handle_t *gdma_region); + +int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, + mana_handle_t gdma_region); + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata); + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata); + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata); + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl); + +struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags); + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata); + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata); + +int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata); + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata); + +int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id, + struct mana_ib_pd *pd, u32 doorbell_id); +void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd, + u32 port); + +int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, + struct ib_udata *udata); + +int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata); + +int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); +int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata); + +int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext, + struct ib_udata *udata); +void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); + +int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma); + +int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num, + struct ib_port_immutable *immutable); +int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props, + struct ib_udata *uhw); +int mana_ib_query_port(struct ib_device *ibdev, u32 port, + struct ib_port_attr *props); +int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index, + union ib_gid *gid); + +void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext); + +#endif diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c new file mode 100644 index 0000000000000000000000000000000000000000..351207c60eb65da7505aeac271a310a79b346fc6 --- /dev/null +++ b/drivers/infiniband/hw/mana/mr.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +#define VALID_MR_FLAGS \ + (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ) + +static enum gdma_mr_access_flags +mana_ib_verbs_to_gdma_access_flags(int access_flags) +{ + enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ; + + if (access_flags & IB_ACCESS_LOCAL_WRITE) + flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_WRITE) + flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE; + + if (access_flags & IB_ACCESS_REMOTE_READ) + flags |= GDMA_ACCESS_FLAG_REMOTE_READ; + + return flags; +} + +static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr, + struct gdma_create_mr_params *mr_params) +{ + struct gdma_create_mr_response resp = {}; + struct gdma_create_mr_request req = {}; + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + int err; + + gc = mdev->gdma_context; + + mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req), + sizeof(resp)); + req.pd_handle = mr_params->pd_handle; + req.mr_type = mr_params->mr_type; + + switch (mr_params->mr_type) { + case GDMA_MR_TYPE_GVA: + req.gva.dma_region_handle = mr_params->gva.dma_region_handle; + req.gva.virtual_address = mr_params->gva.virtual_address; + req.gva.access_flags = mr_params->gva.access_flags; + break; + + default: + ibdev_dbg(&dev->ib_dev, + "invalid param (GDMA_MR_TYPE) passed, type %d\n", + req.mr_type); + return -EINVAL; + } + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + + if (err || resp.hdr.status) { + ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + + return err; + } + + mr->ibmr.lkey = resp.lkey; + mr->ibmr.rkey = resp.rkey; + mr->mr_handle = resp.mr_handle; + + return 0; +} + +static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle) +{ + struct gdma_destroy_mr_response resp = {}; + struct gdma_destroy_mr_request req = {}; + struct gdma_dev *mdev = dev->gdma_dev; + struct gdma_context *gc; + int err; + + gc = mdev->gdma_context; + + mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req), + sizeof(resp)); + + req.mr_handle = mr_handle; + + err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp); + if (err || resp.hdr.status) { + dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err, + resp.hdr.status); + if (!err) + err = -EPROTO; + return err; + } + + return 0; +} + +struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, + u64 iova, int access_flags, + struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct gdma_create_mr_params mr_params = {}; + struct ib_device *ibdev = ibpd->device; + struct mana_ib_dev *dev; + struct mana_ib_mr *mr; + u64 dma_region_handle; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + ibdev_dbg(ibdev, + "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x", + start, iova, length, access_flags); + + if (access_flags & ~VALID_MR_FLAGS) + return ERR_PTR(-EINVAL); + + mr = kzalloc(sizeof(*mr), GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + mr->umem = ib_umem_get(ibdev, start, length, access_flags); + if (IS_ERR(mr->umem)) { + err = PTR_ERR(mr->umem); + ibdev_dbg(ibdev, + "Failed to get umem for register user-mr, %d\n", err); + goto err_free; + } + + err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle); + if (err) { + ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n", + err); + goto err_umem; + } + + ibdev_dbg(ibdev, + "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err, + dma_region_handle); + + mr_params.pd_handle = pd->pd_handle; + mr_params.mr_type = GDMA_MR_TYPE_GVA; + mr_params.gva.dma_region_handle = dma_region_handle; + mr_params.gva.virtual_address = iova; + mr_params.gva.access_flags = + mana_ib_verbs_to_gdma_access_flags(access_flags); + + err = mana_ib_gd_create_mr(dev, mr, &mr_params); + if (err) + goto err_dma_region; + + /* + * There is no need to keep track of dma_region_handle after MR is + * successfully created. The dma_region_handle is tracked in the PF + * as part of the lifecycle of this MR. + */ + + return &mr->ibmr; + +err_dma_region: + mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context, + dma_region_handle); + +err_umem: + ib_umem_release(mr->umem); + +err_free: + kfree(mr); + return ERR_PTR(err); +} + +int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) +{ + struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr); + struct ib_device *ibdev = ibmr->device; + struct mana_ib_dev *dev; + int err; + + dev = container_of(ibdev, struct mana_ib_dev, ib_dev); + + err = mana_ib_gd_destroy_mr(dev, mr->mr_handle); + if (err) + return err; + + if (mr->umem) + ib_umem_release(mr->umem); + + kfree(mr); + + return 0; +} diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c new file mode 100644 index 0000000000000000000000000000000000000000..ea15ec77e3212a16db49f1a1476a07f1e2a7edd2 --- /dev/null +++ b/drivers/infiniband/hw/mana/qp.c @@ -0,0 +1,506 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev, + struct net_device *ndev, + mana_handle_t default_rxobj, + mana_handle_t ind_table[], + u32 log_ind_tbl_size, u32 rx_hash_key_len, + u8 *rx_hash_key) +{ + struct mana_port_context *mpc = netdev_priv(ndev); + struct mana_cfg_rx_steer_req *req = NULL; + struct mana_cfg_rx_steer_resp resp = {}; + mana_handle_t *req_indir_tab; + struct gdma_context *gc; + struct gdma_dev *mdev; + u32 req_buf_size; + int i, err; + + mdev = dev->gdma_dev; + gc = mdev->gdma_context; + + req_buf_size = + sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE; + req = kzalloc(req_buf_size, GFP_KERNEL); + if (!req) + return -ENOMEM; + + mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size, + sizeof(resp)); + + req->vport = mpc->port_handle; + req->rx_enable = 1; + req->update_default_rxobj = 1; + req->default_rxobj = default_rxobj; + req->hdr.dev_id = mdev->dev_id; + + /* If there are more than 1 entries in indirection table, enable RSS */ + if (log_ind_tbl_size) + req->rss_enable = true; + + req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE; + req->indir_tab_offset = sizeof(*req); + req->update_indir_tab = true; + + req_indir_tab = (mana_handle_t *)(req + 1); + /* The ind table passed to the hardware must have + * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb + * ind_table to MANA_INDIRECT_TABLE_SIZE if required + */ + ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size); + for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) { + req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)]; + ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i, + req_indir_tab[i]); + } + + req->update_hashkey = true; + if (rx_hash_key_len) + memcpy(req->hashkey, rx_hash_key, rx_hash_key_len); + else + netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE); + + ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n", + req->vport, default_rxobj); + + err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp); + if (err) { + netdev_err(ndev, "Failed to configure vPort RX: %d\n", err); + goto out; + } + + if (resp.hdr.status) { + netdev_err(ndev, "vPort RX configuration failed: 0x%x\n", + resp.hdr.status); + err = -EPROTO; + goto out; + } + + netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n", + mpc->port_handle, log_ind_tbl_size); + +out: + kfree(req); + return err; +} + +static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct mana_ib_dev *mdev = + container_of(pd->device, struct mana_ib_dev, ib_dev); + struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl; + struct mana_ib_create_qp_rss_resp resp = {}; + struct mana_ib_create_qp_rss ucmd = {}; + struct gdma_dev *gd = mdev->gdma_dev; + mana_handle_t *mana_ind_table; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_cq *cq; + struct mana_ib_wq *wq; + unsigned int ind_tbl_size; + struct ib_cq *ibcq; + struct ib_wq *ibwq; + int i = 0; + u32 port; + int ret; + + mc = gd->driver_data; + + if (!udata || udata->inlen < sizeof(ucmd)) + return -EINVAL; + + ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (ret) { + ibdev_dbg(&mdev->ib_dev, + "Failed copy from udata for create rss-qp, err %d\n", + ret); + return ret; + } + + if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_recv_wr %d exceeding limit\n", + attr->cap.max_recv_wr); + return -EINVAL; + } + + if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_recv_sge %d exceeding limit\n", + attr->cap.max_recv_sge); + return -EINVAL; + } + + ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size; + if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) { + ibdev_dbg(&mdev->ib_dev, + "Indirect table size %d exceeding limit\n", + ind_tbl_size); + return -EINVAL; + } + + if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) { + ibdev_dbg(&mdev->ib_dev, + "RX Hash function is not supported, %d\n", + ucmd.rx_hash_function); + return -EINVAL; + } + + /* IB ports start with 1, MANA start with 0 */ + port = ucmd.port; + if (port < 1 || port > mc->num_ports) { + ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n", + port); + return -EINVAL; + } + ndev = mc->ports[port - 1]; + mpc = netdev_priv(ndev); + + ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n", + ucmd.rx_hash_function, port); + + mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t), + GFP_KERNEL); + if (!mana_ind_table) { + ret = -ENOMEM; + goto fail; + } + + qp->port = port; + + for (i = 0; i < ind_tbl_size; i++) { + struct mana_obj_spec wq_spec = {}; + struct mana_obj_spec cq_spec = {}; + + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + + ibcq = ibwq->cq; + cq = container_of(ibcq, struct mana_ib_cq, ibcq); + + wq_spec.gdma_region = wq->gdma_region; + wq_spec.queue_size = wq->wq_buf_size; + + cq_spec.gdma_region = cq->gdma_region; + cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = GDMA_CQ_NO_EQ; + + ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ, + &wq_spec, &cq_spec, &wq->rx_object); + if (ret) + goto fail; + + /* The GDMA regions are now owned by the WQ object */ + wq->gdma_region = GDMA_INVALID_DMA_REGION; + cq->gdma_region = GDMA_INVALID_DMA_REGION; + + wq->id = wq_spec.queue_index; + cq->id = cq_spec.queue_index; + + ibdev_dbg(&mdev->ib_dev, + "ret %d rx_object 0x%llx wq id %llu cq id %llu\n", + ret, wq->rx_object, wq->id, cq->id); + + resp.entries[i].cqid = cq->id; + resp.entries[i].wqid = wq->id; + + mana_ind_table[i] = wq->rx_object; + } + resp.num_entries = i; + + ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object, + mana_ind_table, + ind_tbl->log_ind_tbl_size, + ucmd.rx_hash_key_len, + ucmd.rx_hash_key); + if (ret) + goto fail; + + ret = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (ret) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy to udata create rss-qp, %d\n", + ret); + goto fail; + } + + kfree(mana_ind_table); + + return 0; + +fail: + while (i-- > 0) { + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); + } + + kfree(mana_ind_table); + + return ret; +} + +static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd, + struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd); + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + struct mana_ib_dev *mdev = + container_of(ibpd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_cq *send_cq = + container_of(attr->send_cq, struct mana_ib_cq, ibcq); + struct mana_ib_ucontext *mana_ucontext = + rdma_udata_to_drv_context(udata, struct mana_ib_ucontext, + ibucontext); + struct mana_ib_create_qp_resp resp = {}; + struct gdma_dev *gd = mdev->gdma_dev; + struct mana_ib_create_qp ucmd = {}; + struct mana_obj_spec wq_spec = {}; + struct mana_obj_spec cq_spec = {}; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct ib_umem *umem; + int err; + u32 port; + + mc = gd->driver_data; + + if (!mana_ucontext || udata->inlen < sizeof(ucmd)) + return -EINVAL; + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy from udata create qp-raw, %d\n", err); + return err; + } + + /* IB ports start with 1, MANA Ethernet ports start with 0 */ + port = ucmd.port; + if (ucmd.port > mc->num_ports) + return -EINVAL; + + if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_send_wr %d exceeding limit\n", + attr->cap.max_send_wr); + return -EINVAL; + } + + if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) { + ibdev_dbg(&mdev->ib_dev, + "Requested max_send_sge %d exceeding limit\n", + attr->cap.max_send_sge); + return -EINVAL; + } + + ndev = mc->ports[port - 1]; + mpc = netdev_priv(ndev); + ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc); + + err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell); + if (err) + return -ENODEV; + + qp->port = port; + + ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n", + ucmd.sq_buf_addr, ucmd.port); + + umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + ibdev_dbg(&mdev->ib_dev, + "Failed to get umem for create qp-raw, err %d\n", + err); + goto err_free_vport; + } + qp->sq_umem = umem; + + err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem, + &qp->sq_gdma_region); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create dma region for create qp-raw, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(&mdev->ib_dev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, qp->sq_gdma_region); + + /* Create a WQ on the same port handle used by the Ethernet */ + wq_spec.gdma_region = qp->sq_gdma_region; + wq_spec.queue_size = ucmd.sq_buf_size; + + cq_spec.gdma_region = send_cq->gdma_region; + cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE; + cq_spec.modr_ctx_id = 0; + cq_spec.attached_eq = GDMA_CQ_NO_EQ; + + err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec, + &cq_spec, &qp->tx_object); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create wq for create raw-qp, err %d\n", + err); + goto err_destroy_dma_region; + } + + /* The GDMA regions are now owned by the WQ object */ + qp->sq_gdma_region = GDMA_INVALID_DMA_REGION; + send_cq->gdma_region = GDMA_INVALID_DMA_REGION; + + qp->sq_id = wq_spec.queue_index; + send_cq->id = cq_spec.queue_index; + + ibdev_dbg(&mdev->ib_dev, + "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err, + qp->tx_object, qp->sq_id, send_cq->id); + + resp.sqid = qp->sq_id; + resp.cqid = send_cq->id; + resp.tx_vp_offset = pd->tx_vp_offset; + + err = ib_copy_to_udata(udata, &resp, sizeof(resp)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed copy udata for create qp-raw, %d\n", + err); + goto err_destroy_wq_obj; + } + + return 0; + +err_destroy_wq_obj: + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + +err_destroy_dma_region: + mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); + +err_release_umem: + ib_umem_release(umem); + +err_free_vport: + mana_ib_uncfg_vport(mdev, pd, port - 1); + + return err; +} + +int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr, + struct ib_udata *udata) +{ + switch (attr->qp_type) { + case IB_QPT_RAW_PACKET: + /* When rwq_ind_tbl is used, it's for creating WQs for RSS */ + if (attr->rwq_ind_tbl) + return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr, + udata); + + return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata); + default: + /* Creating QP other than IB_QPT_RAW_PACKET is not supported */ + ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n", + attr->qp_type); + } + + return -EINVAL; +} + +int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, + int attr_mask, struct ib_udata *udata) +{ + /* modify_qp is not supported by this version of the driver */ + return -EOPNOTSUPP; +} + +static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp, + struct ib_rwq_ind_table *ind_tbl, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_dev *gd = mdev->gdma_dev; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_wq *wq; + struct ib_wq *ibwq; + int i; + + mc = gd->driver_data; + ndev = mc->ports[qp->port - 1]; + mpc = netdev_priv(ndev); + + for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) { + ibwq = ind_tbl->ind_tbl[i]; + wq = container_of(ibwq, struct mana_ib_wq, ibwq); + ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n", + wq->rx_object); + mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object); + } + + return 0; +} + +static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev); + struct gdma_dev *gd = mdev->gdma_dev; + struct ib_pd *ibpd = qp->ibqp.pd; + struct mana_port_context *mpc; + struct mana_context *mc; + struct net_device *ndev; + struct mana_ib_pd *pd; + + mc = gd->driver_data; + ndev = mc->ports[qp->port - 1]; + mpc = netdev_priv(ndev); + pd = container_of(ibpd, struct mana_ib_pd, ibpd); + + mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object); + + if (qp->sq_umem) { + mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region); + ib_umem_release(qp->sq_umem); + } + + mana_ib_uncfg_vport(mdev, pd, qp->port - 1); + + return 0; +} + +int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) +{ + struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp); + + switch (ibqp->qp_type) { + case IB_QPT_RAW_PACKET: + if (ibqp->rwq_ind_tbl) + return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl, + udata); + + return mana_ib_destroy_qp_raw(qp, udata); + + default: + ibdev_dbg(ibqp->device, "Unexpected QP type %u\n", + ibqp->qp_type); + } + + return -ENOENT; +} diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c new file mode 100644 index 0000000000000000000000000000000000000000..372d361510e0ca0ea9060f5b53d1a517a72cb178 --- /dev/null +++ b/drivers/infiniband/hw/mana/wq.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2022, Microsoft Corporation. All rights reserved. + */ + +#include "mana_ib.h" + +struct ib_wq *mana_ib_create_wq(struct ib_pd *pd, + struct ib_wq_init_attr *init_attr, + struct ib_udata *udata) +{ + struct mana_ib_dev *mdev = + container_of(pd->device, struct mana_ib_dev, ib_dev); + struct mana_ib_create_wq ucmd = {}; + struct mana_ib_wq *wq; + struct ib_umem *umem; + int err; + + if (udata->inlen < sizeof(ucmd)) + return ERR_PTR(-EINVAL); + + err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen)); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to copy from udata for create wq, %d\n", err); + return ERR_PTR(err); + } + + wq = kzalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) + return ERR_PTR(-ENOMEM); + + ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr); + + umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(umem)) { + err = PTR_ERR(umem); + ibdev_dbg(&mdev->ib_dev, + "Failed to get umem for create wq, err %d\n", err); + goto err_free_wq; + } + + wq->umem = umem; + wq->wqe = init_attr->max_wr; + wq->wq_buf_size = ucmd.wq_buf_size; + wq->rx_object = INVALID_MANA_HANDLE; + + err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region); + if (err) { + ibdev_dbg(&mdev->ib_dev, + "Failed to create dma region for create wq, %d\n", + err); + goto err_release_umem; + } + + ibdev_dbg(&mdev->ib_dev, + "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n", + err, wq->gdma_region); + + /* WQ ID is returned at wq_create time, doesn't know the value yet */ + + return &wq->ibwq; + +err_release_umem: + ib_umem_release(umem); + +err_free_wq: + kfree(wq); + + return ERR_PTR(err); +} + +int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr, + u32 wq_attr_mask, struct ib_udata *udata) +{ + /* modify_wq is not supported by this version of the driver */ + return -EOPNOTSUPP; +} + +int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata) +{ + struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq); + struct ib_device *ib_dev = ibwq->device; + struct mana_ib_dev *mdev; + + mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev); + + mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region); + ib_umem_release(wq->umem); + + kfree(wq); + + return 0; +} + +int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table, + struct ib_rwq_ind_table_init_attr *init_attr, + struct ib_udata *udata) +{ + /* + * There is no additional data in ind_table to be maintained by this + * driver, do nothing + */ + return 0; +} + +int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl) +{ + /* + * There is no additional data in ind_table to be maintained by this + * driver, do nothing + */ + return 0; +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index ba47874f90d381695f06d732bc1e520400c7c647..dceebcd885bbd05c228e79408565a7b466f56e02 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -144,8 +144,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device, } } } - if (dev) - dev_hold(dev); + dev_hold(dev); rcu_read_unlock(); return dev; @@ -1307,8 +1306,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, spin_lock_bh(&mdev->iboe.lock); ndev = mdev->iboe.netdevs[mqp->port - 1]; - if (ndev) - dev_hold(ndev); + dev_hold(ndev); spin_unlock_bh(&mdev->iboe.lock); if (ndev) { @@ -1955,11 +1953,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) if (ge) { spin_lock_bh(&mdev->iboe.lock); ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL; - if (ndev) - dev_hold(ndev); + dev_hold(ndev); spin_unlock_bh(&mdev->iboe.lock); - if (ndev) - dev_put(ndev); + dev_put(ndev); list_del(&ge->list); kfree(ge); } else diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index be189e0525de6c33549f68b727c5c95317455d80..efc9e4a6df04a2b623440c4b7d00d1da08d230d2 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe, wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE; } -static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe) +static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, + struct ib_wc *wc, const char *level) { - mlx5_ib_warn(dev, "dump error cqe\n"); - mlx5_dump_err_cqe(dev->mdev, cqe); + mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status, + ib_wc_status_msg(wc->status)); + print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1, + cqe, sizeof(*cqe), false); } static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe, struct ib_wc *wc) { - int dump = 1; + const char *dump = KERN_WARNING; switch (cqe->syndrome) { case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR: @@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_LOC_QP_OP_ERR; break; case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_LOC_PROT_ERR; break; case MLX5_CQE_SYNDROME_WR_FLUSH_ERR: - dump = 0; + dump = NULL; wc->status = IB_WC_WR_FLUSH_ERR; break; case MLX5_CQE_SYNDROME_MW_BIND_ERR: @@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, wc->status = IB_WC_REM_INV_REQ_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_ACCESS_ERR; break; case MLX5_CQE_SYNDROME_REMOTE_OP_ERR: + dump = KERN_DEBUG; wc->status = IB_WC_REM_OP_ERR; break; case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR: + dump = NULL; wc->status = IB_WC_RNR_RETRY_EXC_ERR; - dump = 0; break; case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR: wc->status = IB_WC_REM_ABORT_ERR; @@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev, } wc->vendor_err = cqe->vendor_err_synd; - if (dump) { - mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status, - ib_wc_status_msg(wc->status)); - dump_cqe(dev, cqe); - } + if (dump) + dump_cqe(dev, cqe, wc, dump); } static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64, diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 490ec308e3098f094c2cbb85ab42c30ba0b142fb..3008632a6c2064a81c5c8d5ded5e2f98385319de 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -127,7 +127,6 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) } #define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 4a7f7064bd0eb5221f49113cd8e52ed1b18af734..8b91babdd4c084f5328cddd6c2d26ff081c0b9a5 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -38,6 +38,10 @@ dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ __LINE__, current->pid, ##arg) +#define mlx5_ib_log(lvl, _dev, format, arg...) \ + dev_printk(lvl, &(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, ##arg) + #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 410cc5fd25239db3a784360720048080ecc7d88b..053fe946e45aef44d62a8fe137a04beb258493cf 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1929,10 +1929,8 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); in = kzalloc(inlen, GFP_KERNEL); - if (!in) { - err = -ENOMEM; - goto free; - } + if (!in) + return -ENOMEM; mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index ba0c3e4c07d85cda8c517afbb9304b262da26989..ecdfeff3d44f364fc728b018fdb0b1b3da6d7984 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -479,7 +479,7 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle) /* The CQ's CNQ notification counter is checked before * destroying the CQ in a busy-wait loop that waits for all of * the CQ's CNQ interrupts to be processed. It is increased - * here, only after the completion handler, to ensure that the + * here, only after the completion handler, to ensure that * the handler is not running when the CQ is destroyed. */ cq->cnq_notif++; diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c index 07386117f21ad8ffb822471eaa90e4fe6d35b37a..277769fa97452c5dd5760d378a54126a0d3441c1 100644 --- a/drivers/infiniband/hw/qib/qib_iba6120.c +++ b/drivers/infiniband/hw/qib/qib_iba6120.c @@ -799,12 +799,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg, hwerrs &= ~TXE_PIO_PARITY; } - if (!hwerrs) { - static u32 freeze_cnt; - - freeze_cnt++; + if (!hwerrs) qib_6120_clear_freeze(dd); - } else + else isfatal = 1; } diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c index 6a8148851f21d32fbaf9b78a9b15fa0cc4694595..1325110237cd9ae4cea2c752dc6999dfdd3f1cbe 100644 --- a/drivers/infiniband/hw/qib/qib_tx.c +++ b/drivers/infiniband/hw/qib/qib_tx.c @@ -82,7 +82,6 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) struct qib_devdata *dd = rcd->dd; unsigned i; unsigned last; - unsigned n = 0; last = rcd->pio_base + rcd->piocnt; /* @@ -102,10 +101,8 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd) } spin_lock_irq(&dd->pioavail_lock); for (i = rcd->pio_base; i < last; i++) { - if (__test_and_clear_bit(i, dd->pio_need_disarm)) { - n++; + if (__test_and_clear_bit(i, dd->pio_need_disarm)) dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i)); - } } spin_unlock_irq(&dd->pioavail_lock); return 0; diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c index bf2f30d67949dc386404a632c1ec28834d7269ff..9fe03d6ffac1a7890a34a2827d485516ef29637b 100644 --- a/drivers/infiniband/hw/qib/qib_user_sdma.c +++ b/drivers/infiniband/hw/qib/qib_user_sdma.c @@ -851,7 +851,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd, } /* - * This assignment is a bit strange. it's because the + * This assignment is a bit strange. it's because * the pbc counts the number of 32 bit words in the full * packet _except_ the first word of the pbc itself... */ diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c index 51daac5c4feb75b847bb06f8a32818ebd941ac73..136c2efe34660a4fe7eb432c2b46e28aa3de6e4a 100644 --- a/drivers/infiniband/sw/rxe/rxe.c +++ b/drivers/infiniband/sw/rxe/rxe.c @@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev) exists = rxe_get_dev_from_net(ndev); if (exists) { ib_device_put(&exists->ib_dev); - pr_err("already configured on %s\n", ndev->name); + rxe_dbg(exists, "already configured on %s\n", ndev->name); err = -EEXIST; goto err; } err = rxe_net_add(ibdev_name, ndev); if (err) { - pr_err("failed to add %s\n", ndev->name); + rxe_dbg(exists, "failed to add %s\n", ndev->name); goto err; } err: diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h index 30fbdf3bc76a34dc3cfa2e21279ebb88226adbee..ab334900fcc3d7fc9bda4f7cd05fddbbc2ecfb82 100644 --- a/drivers/infiniband/sw/rxe/rxe.h +++ b/drivers/infiniband/sw/rxe/rxe.h @@ -38,6 +38,25 @@ #define RXE_ROCE_V2_SPORT (0xc000) +#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev, \ + "%s: " fmt, __func__, ##__VA_ARGS__) +#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device, \ + "uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device, \ + "pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device, \ + "ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device, \ + "srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device, \ + "qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device, \ + "cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device, \ + "mr#%d %s: " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__) +#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device, \ + "mw#%d %s: " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__) + void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu); int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name); diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c index 3b05314ca739ec91c070829ab6123bd01a7ba986..889d7adbd45504af87543b41c545aa4e6423b92d 100644 --- a/drivers/infiniband/sw/rxe/rxe_av.c +++ b/drivers/infiniband/sw/rxe/rxe_av.c @@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av) memcpy(av->dmac, attr->roce.dmac, ETH_ALEN); } -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) +static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah) { const struct ib_global_route *grh = rdma_ah_read_grh(attr); struct rxe_port *port; + struct rxe_dev *rxe; + struct rxe_qp *qp; + struct rxe_ah *ah; int type; + if (obj_is_ah) { + ah = obj; + rxe = to_rdev(ah->ibah.device); + } else { + qp = obj; + rxe = to_rdev(qp->ibqp.device); + } + port = &rxe->port; if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) { if (grh->sgid_index > port->attr.gid_tbl_len) { - pr_warn("invalid sgid index = %d\n", - grh->sgid_index); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid sgid index = %d\n", + grh->sgid_index); + else + rxe_dbg_qp(qp, "invalid sgid index = %d\n", + grh->sgid_index); return -EINVAL; } type = rdma_gid_attr_network_type(grh->sgid_attr); if (type < RDMA_NETWORK_IPV4 || type > RDMA_NETWORK_IPV6) { - pr_warn("invalid network type for rdma_rxe = %d\n", - type); + if (obj_is_ah) + rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n", + type); + else + rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n", + type); return -EINVAL; } } @@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr) return 0; } +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr) +{ + return chk_attr(qp, attr, false); +} + +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr) +{ + return chk_attr(ah, attr, true); +} + void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr) { @@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp) /* only new user provider or kernel client */ ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num); if (!ah) { - pr_warn("Unable to find AH matching ah_num\n"); + rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n"); return NULL; } if (rxe_ah_pd(ah) != pkt->qp->pd) { - pr_warn("PDs don't match for AH and QP\n"); + rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n"); rxe_put(ah); return NULL; } diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c index fb0c008af78cc2db16808165d3548567405df035..20737fec392bf7efae659316c4afbbfaddfab998 100644 --- a/drivers/infiniband/sw/rxe/rxe_comp.c +++ b/drivers/infiniband/sw/rxe/rxe_comp.c @@ -104,6 +104,8 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode) case IB_WR_LOCAL_INV: return IB_WC_LOCAL_INV; case IB_WR_REG_MR: return IB_WC_REG_MR; case IB_WR_BIND_MW: return IB_WC_BIND_MW; + case IB_WR_ATOMIC_WRITE: return IB_WC_ATOMIC_WRITE; + case IB_WR_FLUSH: return IB_WC_FLUSH; default: return 0xff; @@ -114,11 +116,11 @@ void retransmit_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, retrans_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index); + rxe_dbg_qp(qp, "retransmit timer fired\n"); if (qp->valid) { qp->comp.timeout = 1; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } } @@ -132,7 +134,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) if (must_sched != 0) rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED); - rxe_run_task(&qp->comp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->comp.task); + else + rxe_run_task(&qp->comp.task); } static inline enum comp_state get_wqe(struct rxe_qp *qp, @@ -200,6 +205,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp, */ if (pkt->psn == wqe->last_psn) return COMPST_COMP_ACK; + else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE && + (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST || + qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE)) + return COMPST_CHECK_ACK; else return COMPST_DONE; } else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) { @@ -228,6 +237,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST: case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: + /* Check NAK code to handle a remote error */ + if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE) + break; + if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE && pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) { /* read retries of partial data may restart from @@ -258,12 +271,16 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, if ((syn & AETH_TYPE_MASK) != AETH_ACK) return COMPST_ERROR; + if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE) + return COMPST_WRITE_SEND; + fallthrough; /* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH) */ case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE: if (wqe->wr.opcode != IB_WR_RDMA_READ && - wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) { + wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV && + wqe->wr.opcode != IB_WR_FLUSH) { wqe->status = IB_WC_FATAL_ERR; return COMPST_ERROR; } @@ -305,7 +322,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, qp->comp.psn = pkt->psn; if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } return COMPST_ERROR_RETRY; @@ -323,7 +340,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, return COMPST_ERROR; default: - pr_warn("unexpected nak %x\n", syn); + rxe_dbg_qp(qp, "unexpected nak %x\n", syn); wqe->status = IB_WC_REM_OP_ERR; return COMPST_ERROR; } @@ -334,7 +351,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp, break; default: - pr_warn("unexpected opcode\n"); + rxe_dbg_qp(qp, "unexpected opcode\n"); } return COMPST_ERROR; @@ -452,7 +469,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe) */ if (qp->req.wait_fence) { qp->req.wait_fence = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -466,7 +483,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp, if (qp->req.need_rd_atomic) { qp->comp.timeout_retry = 0; qp->req.need_rd_atomic = 0; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } } @@ -512,7 +529,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp, if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } @@ -587,8 +604,7 @@ int rxe_completer(void *arg) state = COMPST_GET_ACK; while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - comp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]); switch (state) { case COMPST_GET_ACK: skb = skb_dequeue(&qp->resp_pkts); @@ -646,7 +662,7 @@ int rxe_completer(void *arg) if (qp->req.wait_psn) { qp->req.wait_psn = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } state = COMPST_DONE; @@ -714,7 +730,7 @@ int rxe_completer(void *arg) RXE_CNT_COMP_RETRY); qp->req.need_retry = 1; qp->comp.started_retry = 1; - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); } goto done; @@ -735,8 +751,7 @@ int rxe_completer(void *arg) * rnr timer has fired */ qp->req.wait_for_rnr_timer = 1; - pr_debug("qp#%d set rnr nak timer\n", - qp_num(qp)); + rxe_dbg_qp(qp, "set rnr nak timer\n"); mod_timer(&qp->rnr_nak_timer, jiffies + rnrnak_jiffies(aeth_syn(pkt) & ~AETH_TYPE_MASK)); diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c index b1a0ab3cd4bd1864a151cd5add6aee422ec3d13e..1df186534639aa507f100801fca835ed6ec59852 100644 --- a/drivers/infiniband/sw/rxe/rxe_cq.c +++ b/drivers/infiniband/sw/rxe/rxe_cq.c @@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, int count; if (cqe <= 0) { - pr_warn("cqe(%d) <= 0\n", cqe); + rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe); goto err1; } if (cqe > rxe->attr.max_cqe) { - pr_debug("cqe(%d) > max_cqe(%d)\n", + rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n", cqe, rxe->attr.max_cqe); goto err1; } @@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq, if (cq) { count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT); if (cqe < count) { - pr_debug("cqe(%d) < current # elements in queue (%d)", + rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)", cqe, count); goto err1; } @@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe, cq->queue = rxe_queue_init(rxe, &cqe, sizeof(struct rxe_cqe), type); if (!cq->queue) { - pr_warn("unable to create cq\n"); + rxe_dbg(rxe, "unable to create cq\n"); return -ENOMEM; } diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h index e432f9e37795ef703206d091b19bbf78cf1b5cc1..46f82b27fcd2f540ed32d7495f530164b2d2e452 100644 --- a/drivers/infiniband/sw/rxe/rxe_hdr.h +++ b/drivers/infiniband/sw/rxe/rxe_hdr.h @@ -607,6 +607,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len) rxe_opcode[pkt->opcode].offset[RXE_RETH], len); } +/****************************************************************************** + * FLUSH Extended Transport Header + ******************************************************************************/ + +struct rxe_feth { + __be32 bits; +}; + +#define FETH_PLT_MASK (0x0000000f) /* bits 3-0 */ +#define FETH_SEL_MASK (0x00000030) /* bits 5-4 */ +#define FETH_SEL_SHIFT (4U) + +static inline u32 __feth_plt(void *arg) +{ + struct rxe_feth *feth = arg; + + return be32_to_cpu(feth->bits) & FETH_PLT_MASK; +} + +static inline u32 __feth_sel(void *arg) +{ + struct rxe_feth *feth = arg; + + return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT; +} + +static inline u32 feth_plt(struct rxe_pkt_info *pkt) +{ + return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline u32 feth_sel(struct rxe_pkt_info *pkt) +{ + return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); +} + +static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level) +{ + struct rxe_feth *feth = (struct rxe_feth *) + (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]); + u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) | + (type & FETH_PLT_MASK); + + feth->bits = cpu_to_be32(bits); +} + /****************************************************************************** * Atomic Extended Transport Header ******************************************************************************/ @@ -742,7 +788,6 @@ enum aeth_syndrome { AETH_NAK_INVALID_REQ = 0x61, AETH_NAK_REM_ACC_ERR = 0x62, AETH_NAK_REM_OP_ERR = 0x63, - AETH_NAK_INV_RD_REQ = 0x64, }; static inline u8 __aeth_syn(void *arg) @@ -910,6 +955,7 @@ enum rxe_hdr_length { RXE_ATMETH_BYTES = sizeof(struct rxe_atmeth), RXE_IETH_BYTES = sizeof(struct rxe_ieth), RXE_RDETH_BYTES = sizeof(struct rxe_rdeth), + RXE_FETH_BYTES = sizeof(struct rxe_feth), }; static inline size_t header_size(struct rxe_pkt_info *pkt) diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c index 46bb07c5c4df2532736a724e818c70534c748450..71bc2c1895888f8fce3a53c8da38a698987cd7ae 100644 --- a/drivers/infiniband/sw/rxe/rxe_icrc.c +++ b/drivers/infiniband/sw/rxe/rxe_icrc.c @@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe) tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { - pr_warn("failed to init crc32 algorithm err:%ld\n", + rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } @@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len) *(__be32 *)shash_desc_ctx(shash) = crc; err = crypto_shash_update(shash, next, len); if (unlikely(err)) { - pr_warn_ratelimited("failed crc calculation, err: %d\n", err); + rxe_dbg(rxe, "failed crc calculation, err: %d\n", err); return (__force __be32)crc32_le((__force u32)crc, next, len); } diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index c2a5c8814a48bbefac18316fe81014a9cdad649f..948ce4902b10fe38bb2aedd88849f08f8d6b5159 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -9,16 +9,12 @@ /* rxe_av.c */ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av); - -int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr); - +int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr); +int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr); void rxe_av_from_attr(u8 port_num, struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr); - void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr); - struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp); /* rxe_cq.c */ @@ -68,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr); int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int access, struct rxe_mr *mr); int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr); +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length); int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, enum rxe_mr_copy_dir dir); int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma, diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c index 9149b60954296004de0b1babb97e03fc0e6c1f47..a47d72dbc5376dd9239057ca659ce3f177549353 100644 --- a/drivers/infiniband/sw/rxe/rxe_mmap.c +++ b/drivers/infiniband/sw/rxe/rxe_mmap.c @@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) /* Don't allow a mmap larger than the object. */ if (size > ip->info.size) { - pr_err("mmap region is larger than the object!\n"); + rxe_dbg(rxe, "mmap region is larger than the object!\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma) goto found_it; } - pr_warn("unable to find pending mmap info\n"); + rxe_dbg(rxe, "unable to find pending mmap info\n"); spin_unlock_bh(&rxe->pending_lock); ret = -EINVAL; goto done; @@ -98,7 +98,7 @@ found_it: ret = remap_vmalloc_range(vma, ip->obj, 0); if (ret) { - pr_err("err %d from remap_vmalloc_range\n", ret); + rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret); goto done; } diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 502e9ada99b307a09291a5c38a6d02d155e2187e..072eac4b65d2966874ab3ab2958238727baa0472 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -4,6 +4,8 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ +#include + #include "rxe.h" #include "rxe_loc.h" @@ -26,7 +28,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) { - switch (mr->type) { + switch (mr->ibmr.type) { case IB_MR_TYPE_DMA: return 0; @@ -38,8 +40,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) return 0; default: - pr_warn("%s: mr type (%d) not supported\n", - __func__, mr->type); + rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type); return -EFAULT; } } @@ -62,7 +63,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr) mr->rkey = mr->ibmr.rkey = rkey; mr->state = RXE_MR_STATE_INVALID; - mr->map_shift = ilog2(RXE_BUF_PER_MAP); } static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) @@ -99,6 +99,7 @@ err2: kfree(mr->map[i]); kfree(mr->map); + mr->map = NULL; err1: return -ENOMEM; } @@ -109,7 +110,16 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr) mr->access = access; mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_DMA; + mr->ibmr.type = IB_MR_TYPE_DMA; +} + +static bool is_pmem_page(struct page *pg) +{ + unsigned long paddr = page_to_phys(pg); + + return REGION_INTERSECTS == + region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM, + IORES_DESC_PERSISTENT_MEMORY); } int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, @@ -122,12 +132,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, int num_buf; void *vaddr; int err; - int i; umem = ib_umem_get(&rxe->ib_dev, start, length, access); if (IS_ERR(umem)) { - pr_warn("%s: Unable to pin memory region err = %d\n", - __func__, (int)PTR_ERR(umem)); + rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n", + (int)PTR_ERR(umem)); err = PTR_ERR(umem); goto err_out; } @@ -138,8 +147,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, err = rxe_mr_alloc(mr, num_buf); if (err) { - pr_warn("%s: Unable to allocate memory for map\n", - __func__); + rxe_dbg_mr(mr, "Unable to allocate memory for map\n"); goto err_release_umem; } @@ -149,23 +157,30 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, num_buf = 0; map = mr->map; if (length > 0) { - buf = map[0]->buf; + bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT; + buf = map[0]->buf; for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { + struct page *pg = sg_page_iter_page(&sg_iter); + + if (persistent_access && !is_pmem_page(pg)) { + rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n"); + err = -EINVAL; + goto err_release_umem; + } + if (num_buf >= RXE_BUF_PER_MAP) { map++; buf = map[0]->buf; num_buf = 0; } - vaddr = page_address(sg_page_iter_page(&sg_iter)); + vaddr = page_address(pg); if (!vaddr) { - pr_warn("%s: Unable to get virtual address\n", - __func__); + rxe_dbg_mr(mr, "Unable to get virtual address\n"); err = -ENOMEM; - goto err_cleanup_map; + goto err_release_umem; } - buf->addr = (uintptr_t)vaddr; buf->size = PAGE_SIZE; num_buf++; @@ -178,14 +193,11 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, mr->access = access; mr->offset = ib_umem_offset(umem); mr->state = RXE_MR_STATE_VALID; - mr->type = IB_MR_TYPE_USER; + mr->ibmr.type = IB_MR_TYPE_USER; + mr->ibmr.page_size = PAGE_SIZE; return 0; -err_cleanup_map: - for (i = 0; i < mr->num_map; i++) - kfree(mr->map[i]); - kfree(mr->map); err_release_umem: ib_umem_release(umem); err_out: @@ -205,7 +217,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) mr->max_buf = max_pages; mr->state = RXE_MR_STATE_FREE; - mr->type = IB_MR_TYPE_MEM_REG; + mr->ibmr.type = IB_MR_TYPE_MEM_REG; return 0; @@ -256,7 +268,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) void *addr; if (mr->state != RXE_MR_STATE_VALID) { - pr_warn("mr not in valid state\n"); + rxe_dbg_mr(mr, "Not in valid state\n"); addr = NULL; goto out; } @@ -267,7 +279,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) } if (mr_check_range(mr, iova, length)) { - pr_warn("range violation\n"); + rxe_dbg_mr(mr, "Range violation\n"); addr = NULL; goto out; } @@ -275,7 +287,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) lookup_iova(mr, iova, &m, &n, &offset); if (offset + length > mr->map[m]->buf[n].size) { - pr_warn("crosses page boundary\n"); + rxe_dbg_mr(mr, "Crosses page boundary\n"); addr = NULL; goto out; } @@ -286,6 +298,39 @@ out: return addr; } +int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length) +{ + size_t offset; + + if (length == 0) + return 0; + + if (mr->ibmr.type == IB_MR_TYPE_DMA) + return -EFAULT; + + offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask; + while (length > 0) { + u8 *va; + int bytes; + + bytes = mr->ibmr.page_size - offset; + if (bytes > length) + bytes = length; + + va = iova_to_vaddr(mr, iova, length); + if (!va) + return -EFAULT; + + arch_wb_cache_pmem(va, bytes); + + length -= bytes; + iova += bytes; + offset = 0; + } + + return 0; +} + /* copy data from a range (vaddr, vaddr+length-1) to or from * a mr object starting at iova. */ @@ -304,7 +349,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, if (length == 0) return 0; - if (mr->type == IB_MR_TYPE_DMA) { + if (mr->ibmr.type == IB_MR_TYPE_DMA) { u8 *src, *dest; src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); @@ -511,7 +556,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || - mr_pd(mr) != pd || (access && !(access & mr->access)) || + mr_pd(mr) != pd || ((access & mr->access) != access) || mr->state != RXE_MR_STATE_VALID)) { rxe_put(mr); mr = NULL; @@ -528,27 +573,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); if (!mr) { - pr_err("%s: No MR for key %#x\n", __func__, key); + rxe_dbg_qp(qp, "No MR for key %#x\n", key); ret = -EINVAL; goto err; } if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { - pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", - __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); + rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", + key, (mr->rkey ? mr->rkey : mr->lkey)); ret = -EINVAL; goto err_drop_ref; } if (atomic_read(&mr->num_mw) > 0) { - pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", - __func__); + rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n"); ret = -EINVAL; goto err_drop_ref; } - if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { - pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); + if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { + rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type); ret = -EINVAL; goto err_drop_ref; } @@ -577,22 +621,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) /* user can only register MR in free state */ if (unlikely(mr->state != RXE_MR_STATE_FREE)) { - pr_warn("%s: mr->lkey = 0x%x not free\n", - __func__, mr->lkey); + rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey); return -EINVAL; } /* user can only register mr with qp in same protection domain */ if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { - pr_warn("%s: qp->pd and mr->pd don't match\n", - __func__); + rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n"); return -EINVAL; } /* user is only allowed to change key portion of l/rkey */ if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { - pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", - __func__, key, mr->lkey); + rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n", + key, mr->lkey); return -EINVAL; } diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c index 902b7df7aaedb626ce33075da22e2cd3ebdbc685..afa5ce1a711667e90bc501f908ad49c00013564e 100644 --- a/drivers/infiniband/sw/rxe/rxe_mw.c +++ b/drivers/infiniband/sw/rxe/rxe_mw.c @@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, { if (mw->ibmw.type == IB_MW_TYPE_1) { if (unlikely(mw->state != RXE_MW_STATE_VALID)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 1 MW not in the valid state\n"); return -EINVAL; } /* o10-36.2.2 */ if (unlikely((mw->access & IB_ZERO_BASED))) { - pr_err_once("attempt to bind a zero based type 1 MW\n"); + rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n"); return -EINVAL; } } @@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (mw->ibmw.type == IB_MW_TYPE_2) { /* o10-37.2.30 */ if (unlikely(mw->state != RXE_MW_STATE_FREE)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a type 2 MW not in the free state\n"); return -EINVAL; } /* C10-72 */ if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind type 2 MW with qp with different PD\n"); return -EINVAL; } /* o10-37.2.40 */ if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to invalidate type 2 MW by binding with NULL or zero length MR\n"); return -EINVAL; } @@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, return 0; if (unlikely(mr->access & IB_ZERO_BASED)) { - pr_err_once("attempt to bind MW to zero based MR\n"); + rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n"); return -EINVAL; } /* C10-73 */ if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an MW to an MR without bind access\n"); return -EINVAL; } @@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((mw->access & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) && !(mr->access & IB_ACCESS_LOCAL_WRITE))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind an Writable MW to an MR without local write access\n"); return -EINVAL; } @@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, /* C10-75 */ if (mw->access & IB_ZERO_BASED) { if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a ZB MW outside of the MR\n"); return -EINVAL; } @@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe, if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) || ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) > (mr->ibmr.iova + mr->ibmr.length)))) { - pr_err_once( + rxe_dbg_mw(mw, "attempt to bind a VA MW outside of the MR\n"); return -EINVAL; } @@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey) if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd || (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) || - (mw->length == 0) || - (access && !(access & mw->access)) || + (mw->length == 0) || ((access & mw->access) != access) || mw->state != RXE_MW_STATE_VALID)) { rxe_put(mw); return NULL; diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c index 35f327b9d4b8ec28e3435a73139149742f8128c7..e02e1624bcf4db4a4fb11c62d396f0bc35a63e8b 100644 --- a/drivers/infiniband/sw/rxe/rxe_net.c +++ b/drivers/infiniband/sw/rxe/rxe_net.c @@ -20,9 +20,10 @@ static struct rxe_recv_sockets recv_sockets; -static struct dst_entry *rxe_find_route4(struct net_device *ndev, - struct in_addr *saddr, - struct in_addr *daddr) +static struct dst_entry *rxe_find_route4(struct rxe_qp *qp, + struct net_device *ndev, + struct in_addr *saddr, + struct in_addr *daddr) { struct rtable *rt; struct flowi4 fl = { { 0 } }; @@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, rt = ip_route_output_key(&init_net, &fl); if (IS_ERR(rt)) { - pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr); + rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr); return NULL; } @@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev, recv_sockets.sk6->sk, &fl6, NULL); if (IS_ERR(ndst)) { - pr_err_ratelimited("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); return NULL; } if (unlikely(ndst->error)) { - pr_err("no route to %pI6\n", daddr); + rxe_dbg_qp(qp, "no route to %pI6\n", daddr); goto put; } @@ -77,7 +79,8 @@ put: #else -static struct dst_entry *rxe_find_route6(struct net_device *ndev, +static struct dst_entry *rxe_find_route6(struct rxe_qp *qp, + struct net_device *ndev, struct in6_addr *saddr, struct in6_addr *daddr) { @@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev, saddr = &av->sgid_addr._sockaddr_in.sin_addr; daddr = &av->dgid_addr._sockaddr_in.sin_addr; - dst = rxe_find_route4(ndev, saddr, daddr); + dst = rxe_find_route4(qp, ndev, saddr, daddr); } else if (av->network_type == RXE_NETWORK_TYPE_IPV6) { struct in6_addr *saddr6; struct in6_addr *daddr6; saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr; daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr; - dst = rxe_find_route6(ndev, saddr6, daddr6); + dst = rxe_find_route6(qp, ndev, saddr6, daddr6); #if IS_ENABLED(CONFIG_IPV6) if (dst) qp->dst_cookie = @@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt, dst = rxe_find_route(skb->dev, qp, av); if (!dst) { - pr_err("Host not reachable\n"); + rxe_dbg_qp(qp, "Host not reachable\n"); return -EHOSTUNREACH; } @@ -345,7 +348,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb) if (unlikely(qp->need_req_skb && skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW)) - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); rxe_put(qp); } @@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } else if (skb->protocol == htons(ETH_P_IPV6)) { err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); } else { - pr_err("Unknown layer 3 protocol: %d\n", skb->protocol); + rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n", + skb->protocol); atomic_dec(&pkt->qp->skb_out); rxe_put(pkt->qp); kfree_skb(skb); @@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt) } if (unlikely(net_xmit_eval(err))) { - pr_debug("error sending packet: %d\n", err); + rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err); return -EAGAIN; } @@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((is_request && (qp->req.state != QP_STATE_READY)) || (!is_request && (qp->resp.state != QP_STATE_READY))) { - pr_info("Packet dropped. QP is not in ready state\n"); + rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n"); goto drop; } @@ -429,7 +433,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt, if ((qp_type(qp) != IB_QPT_RC) && (pkt->mask & RXE_END_MASK)) { pkt->wqe->state = wqe_state_done; - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); } rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS); @@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk, rxe_port_down(rxe); break; case NETDEV_CHANGEMTU: - pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu); + rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu); rxe_set_mtu(rxe, ndev->mtu); break; case NETDEV_CHANGE: @@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk, case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: default: - pr_info("ignoring netdev event = %ld for %s\n", + rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n", event, ndev->name); break; } diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c index d4ba4d506f1760b9a04bcb6f35d6d5c2180c1b93..5c0d5c6ffda4f05949891058cf42eef1acfb15bf 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.c +++ b/drivers/infiniband/sw/rxe/rxe_opcode.c @@ -101,6 +101,18 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = { [IB_QPT_UC] = WR_LOCAL_OP_MASK, }, }, + [IB_WR_FLUSH] = { + .name = "IB_WR_FLUSH", + .mask = { + [IB_QPT_RC] = WR_FLUSH_MASK, + }, + }, + [IB_WR_ATOMIC_WRITE] = { + .name = "IB_WR_ATOMIC_WRITE", + .mask = { + [IB_QPT_RC] = WR_ATOMIC_WRITE_MASK, + }, + }, }; struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { @@ -378,6 +390,29 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = { RXE_IETH_BYTES, } }, + [IB_OPCODE_RC_FLUSH] = { + .name = "IB_OPCODE_RC_FLUSH", + .mask = RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK | + RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK, + .length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_FETH] = RXE_BTH_BYTES, + [RXE_RETH] = RXE_BTH_BYTES + RXE_FETH_BYTES, + } + }, + [IB_OPCODE_RC_ATOMIC_WRITE] = { + .name = "IB_OPCODE_RC_ATOMIC_WRITE", + .mask = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK | + RXE_ATOMIC_WRITE_MASK | RXE_START_MASK | + RXE_END_MASK, + .length = RXE_BTH_BYTES + RXE_RETH_BYTES, + .offset = { + [RXE_BTH] = 0, + [RXE_RETH] = RXE_BTH_BYTES, + [RXE_PAYLOAD] = RXE_BTH_BYTES + RXE_RETH_BYTES, + } + }, /* UC */ [IB_OPCODE_UC_SEND_FIRST] = { diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h index 8f9aaaf260f293c0fab54ba08f74cd0e839a608d..cea4e0a639199cd9d2aa829aa36f2415d845e359 100644 --- a/drivers/infiniband/sw/rxe/rxe_opcode.h +++ b/drivers/infiniband/sw/rxe/rxe_opcode.h @@ -20,6 +20,8 @@ enum rxe_wr_mask { WR_READ_MASK = BIT(3), WR_WRITE_MASK = BIT(4), WR_LOCAL_OP_MASK = BIT(5), + WR_FLUSH_MASK = BIT(6), + WR_ATOMIC_WRITE_MASK = BIT(7), WR_READ_OR_WRITE_MASK = WR_READ_MASK | WR_WRITE_MASK, WR_WRITE_OR_SEND_MASK = WR_WRITE_MASK | WR_SEND_MASK, @@ -47,6 +49,7 @@ enum rxe_hdr_type { RXE_RDETH, RXE_DETH, RXE_IMMDT, + RXE_FETH, RXE_PAYLOAD, NUM_HDR_TYPES }; @@ -63,6 +66,7 @@ enum rxe_hdr_mask { RXE_IETH_MASK = BIT(RXE_IETH), RXE_RDETH_MASK = BIT(RXE_RDETH), RXE_DETH_MASK = BIT(RXE_DETH), + RXE_FETH_MASK = BIT(RXE_FETH), RXE_PAYLOAD_MASK = BIT(RXE_PAYLOAD), RXE_REQ_MASK = BIT(NUM_HDR_TYPES + 0), @@ -71,16 +75,19 @@ enum rxe_hdr_mask { RXE_WRITE_MASK = BIT(NUM_HDR_TYPES + 3), RXE_READ_MASK = BIT(NUM_HDR_TYPES + 4), RXE_ATOMIC_MASK = BIT(NUM_HDR_TYPES + 5), + RXE_FLUSH_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 6), - RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_RWR_MASK = BIT(NUM_HDR_TYPES + 7), + RXE_COMP_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_START_MASK = BIT(NUM_HDR_TYPES + 8), - RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 9), - RXE_END_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_START_MASK = BIT(NUM_HDR_TYPES + 9), + RXE_MIDDLE_MASK = BIT(NUM_HDR_TYPES + 10), + RXE_END_MASK = BIT(NUM_HDR_TYPES + 11), RXE_LOOPBACK_MASK = BIT(NUM_HDR_TYPES + 12), + RXE_ATOMIC_WRITE_MASK = BIT(NUM_HDR_TYPES + 14), + RXE_READ_OR_ATOMIC_MASK = (RXE_READ_MASK | RXE_ATOMIC_MASK), RXE_WRITE_OR_SEND_MASK = (RXE_WRITE_MASK | RXE_SEND_MASK), RXE_READ_OR_WRITE_MASK = (RXE_READ_MASK | RXE_WRITE_MASK), diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index 86c7a8bf3cbbd8c8dc0f61e0c0afaf9b6f7978b8..a754fc902e3d190c1fbdf7ab3ecdeb59fef266de 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -51,7 +51,14 @@ enum rxe_device_param { | IB_DEVICE_SRQ_RESIZE | IB_DEVICE_MEM_MGT_EXTENSIONS | IB_DEVICE_MEM_WINDOW + | IB_DEVICE_FLUSH_GLOBAL + | IB_DEVICE_FLUSH_PERSISTENT +#ifdef CONFIG_64BIT + | IB_DEVICE_MEM_WINDOW_TYPE_2B + | IB_DEVICE_ATOMIC_WRITE, +#else | IB_DEVICE_MEM_WINDOW_TYPE_2B, +#endif /* CONFIG_64BIT */ RXE_MAX_SGE = 32, RXE_MAX_WQE_SIZE = sizeof(struct rxe_send_wqe) + sizeof(struct ib_sge) * RXE_MAX_SGE, diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c index a62bab88415cbff117ee50e8fdd921594278e8b9..ab72db68b58f69cb65a5b69d8e7d932d965fc554 100644 --- a/drivers/infiniband/sw/rxe/rxe_qp.c +++ b/drivers/infiniband/sw/rxe/rxe_qp.c @@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap, int has_srq) { if (cap->max_send_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid send wr = %u > %d\n", + rxe_dbg(rxe, "invalid send wr = %u > %d\n", cap->max_send_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_send_sge > rxe->attr.max_send_sge) { - pr_debug("invalid send sge = %u > %d\n", + rxe_dbg(rxe, "invalid send sge = %u > %d\n", cap->max_send_sge, rxe->attr.max_send_sge); goto err1; } if (!has_srq) { if (cap->max_recv_wr > rxe->attr.max_qp_wr) { - pr_debug("invalid recv wr = %u > %d\n", + rxe_dbg(rxe, "invalid recv wr = %u > %d\n", cap->max_recv_wr, rxe->attr.max_qp_wr); goto err1; } if (cap->max_recv_sge > rxe->attr.max_recv_sge) { - pr_debug("invalid recv sge = %u > %d\n", + rxe_dbg(rxe, "invalid recv sge = %u > %d\n", cap->max_recv_sge, rxe->attr.max_recv_sge); goto err1; } } if (cap->max_inline_data > rxe->max_inline_data) { - pr_debug("invalid max inline data = %u > %d\n", + rxe_dbg(rxe, "invalid max inline data = %u > %d\n", cap->max_inline_data, rxe->max_inline_data); goto err1; } @@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) } if (!init->recv_cq || !init->send_cq) { - pr_debug("missing cq\n"); + rxe_dbg(rxe, "missing cq\n"); goto err1; } @@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init) if (init->qp_type == IB_QPT_GSI) { if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) { - pr_debug("invalid port = %d\n", port_num); + rxe_dbg(rxe, "invalid port = %d\n", port_num); goto err1; } port = &rxe->port; if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) { - pr_debug("GSI QP exists for port %d\n", port_num); + rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num); goto err1; } } @@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp, spin_lock_init(&qp->state_lock); - spin_lock_init(&qp->req.task.state_lock); - spin_lock_init(&qp->resp.task.state_lock); - spin_lock_init(&qp->comp.task.state_lock); - spin_lock_init(&qp->sq.sq_lock); spin_lock_init(&qp->rq.producer_lock); spin_lock_init(&qp->rq.consumer_lock); @@ -242,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->req_pkts); - rxe_init_task(&qp->req.task, qp, - rxe_requester, "req"); - rxe_init_task(&qp->comp.task, qp, - rxe_completer, "comp"); + rxe_init_task(&qp->req.task, qp, rxe_requester); + rxe_init_task(&qp->comp.task, qp, rxe_completer); qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */ if (init->qp_type == IB_QPT_RC) { @@ -270,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, wqe_size = rcv_wqe_size(qp->rq.max_sge); - pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n", - qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size); - type = QUEUE_TYPE_FROM_CLIENT; qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr, wqe_size, type); @@ -292,8 +283,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp, skb_queue_head_init(&qp->resp_pkts); - rxe_init_task(&qp->resp.task, qp, - rxe_responder, "resp"); + rxe_init_task(&qp->resp.task, qp, rxe_responder); qp->resp.opcode = OPCODE_NONE; qp->resp.msn = 0; @@ -402,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, attr->qp_state : cur_state; if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) { - pr_debug("invalid mask or state for qp\n"); + rxe_dbg_qp(qp, "invalid mask or state\n"); goto err1; } @@ -416,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_PORT) { if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) { - pr_debug("invalid port %d\n", attr->port_num); + rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num); goto err1; } } @@ -424,18 +414,18 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq)) goto err1; - if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr)) + if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr)) goto err1; if (mask & IB_QP_ALT_PATH) { - if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr)) + if (rxe_av_chk_attr(qp, &attr->alt_ah_attr)) goto err1; if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num)) { - pr_debug("invalid alt port %d\n", attr->alt_port_num); + rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num); goto err1; } if (attr->alt_timeout > 31) { - pr_debug("invalid QP alt timeout %d > 31\n", + rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n", attr->alt_timeout); goto err1; } @@ -448,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, enum ib_mtu mtu = attr->path_mtu; if (mtu > max_mtu) { - pr_debug("invalid mtu (%d) > (%d)\n", + rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n", ib_mtu_enum_to_int(mtu), ib_mtu_enum_to_int(max_mtu)); goto err1; @@ -457,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_MAX_QP_RD_ATOMIC) { if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) { - pr_debug("invalid max_rd_atomic %d > %d\n", + rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n", attr->max_rd_atomic, rxe->attr.max_qp_rd_atom); goto err1; @@ -466,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp, if (mask & IB_QP_TIMEOUT) { if (attr->timeout > 31) { - pr_debug("invalid QP timeout %d > 31\n", attr->timeout); + rxe_dbg_qp(qp, "invalid timeout %d > 31\n", + attr->timeout); goto err1; } } @@ -543,10 +534,10 @@ static void rxe_qp_drain(struct rxe_qp *qp) if (qp->req.state != QP_STATE_DRAINED) { qp->req.state = QP_STATE_DRAIN; if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } } } @@ -560,13 +551,13 @@ void rxe_qp_error(struct rxe_qp *qp) qp->attr.qp_state = IB_QPS_ERR; /* drain work and packet queues */ - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); if (qp_type(qp) == IB_QPT_RC) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); else __rxe_do_task(&qp->comp.task); - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } /* called by the modify qp verb */ @@ -644,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, if (mask & IB_QP_RETRY_CNT) { qp->attr.retry_cnt = attr->retry_cnt; qp->comp.retry_cnt = attr->retry_cnt; - pr_debug("qp#%d set retry count = %d\n", qp_num(qp), - attr->retry_cnt); + rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt); } if (mask & IB_QP_RNR_RETRY) { qp->attr.rnr_retry = attr->rnr_retry; qp->comp.rnr_retry = attr->rnr_retry; - pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp), - attr->rnr_retry); + rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry); } if (mask & IB_QP_RQ_PSN) { qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK); qp->resp.psn = qp->attr.rq_psn; - pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp), - qp->resp.psn); + rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn); } if (mask & IB_QP_MIN_RNR_TIMER) { qp->attr.min_rnr_timer = attr->min_rnr_timer; - pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp), + rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n", attr->min_rnr_timer); } @@ -672,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK); qp->req.psn = qp->attr.sq_psn; qp->comp.psn = qp->attr.sq_psn; - pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn); + rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn); } if (mask & IB_QP_PATH_MIG_STATE) @@ -686,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask, switch (attr->qp_state) { case IB_QPS_RESET: - pr_debug("qp#%d state -> RESET\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RESET\n"); rxe_qp_reset(qp); break; case IB_QPS_INIT: - pr_debug("qp#%d state -> INIT\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> INIT\n"); qp->req.state = QP_STATE_INIT; qp->resp.state = QP_STATE_INIT; qp->comp.state = QP_STATE_INIT; break; case IB_QPS_RTR: - pr_debug("qp#%d state -> RTR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTR\n"); qp->resp.state = QP_STATE_READY; break; case IB_QPS_RTS: - pr_debug("qp#%d state -> RTS\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> RTS\n"); qp->req.state = QP_STATE_READY; qp->comp.state = QP_STATE_READY; break; case IB_QPS_SQD: - pr_debug("qp#%d state -> SQD\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQD\n"); rxe_qp_drain(qp); break; case IB_QPS_SQE: - pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> SQE !!?\n"); /* Not possible from modify_qp. */ break; case IB_QPS_ERR: - pr_debug("qp#%d state -> ERR\n", qp_num(qp)); + rxe_dbg_qp(qp, "state -> ERR\n"); rxe_qp_error(qp); break; } @@ -759,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask) attr->sq_draining = 0; } - pr_debug("attr->sq_draining = %d\n", attr->sq_draining); + rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining); return 0; } @@ -771,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp) * will fail immediately. */ if (atomic_read(&qp->mcg_num)) { - pr_debug("Attempt to destroy QP while attached to multicast group\n"); + rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n"); return -EBUSY; } @@ -829,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work) if (qp->resp.mr) rxe_put(qp->resp.mr); - if (qp_type(qp) == IB_QPT_RC) - sk_dst_reset(qp->sk->sk); - free_rd_atomic_resources(qp); if (qp->sk) { + if (qp_type(qp) == IB_QPT_RC) + sk_dst_reset(qp->sk->sk); + kernel_sock_shutdown(qp->sk, SHUT_RDWR); sock_release(qp->sk); } diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c index f637712079705f85f7f4c3547c90b5284ce81cfe..899c8779f8001cd21bb16f8726e8b61cc611d50b 100644 --- a/drivers/infiniband/sw/rxe/rxe_req.c +++ b/drivers/infiniband/sw/rxe/rxe_req.c @@ -100,12 +100,12 @@ void rnr_nak_timer(struct timer_list *t) { struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer); - pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp)); + rxe_dbg_qp(qp, "nak timer fired\n"); /* request a send queue retry */ qp->req.need_retry = 1; qp->req.wait_for_rnr_timer = 0; - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); } static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp) @@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE : IB_OPCODE_RC_SEND_FIRST; + case IB_WR_FLUSH: + return IB_OPCODE_RC_FLUSH; + case IB_WR_RDMA_READ: return IB_OPCODE_RC_RDMA_READ_REQUEST; @@ -258,6 +261,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits) else return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE : IB_OPCODE_RC_SEND_FIRST; + + case IB_WR_ATOMIC_WRITE: + return IB_OPCODE_RC_ATOMIC_WRITE; + case IB_WR_REG_MR: case IB_WR_LOCAL_INV: return opcode; @@ -421,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp, /* init optional headers */ if (pkt->mask & RXE_RETH_MASK) { - reth_set_rkey(pkt, ibwr->wr.rdma.rkey); + if (pkt->mask & RXE_FETH_MASK) + reth_set_rkey(pkt, ibwr->wr.flush.rkey); + else + reth_set_rkey(pkt, ibwr->wr.rdma.rkey); reth_set_va(pkt, wqe->iova); reth_set_len(pkt, wqe->dma.resid); } + /* Fill Flush Extension Transport Header */ + if (pkt->mask & RXE_FETH_MASK) + feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level); + if (pkt->mask & RXE_IMMDT_MASK) immdt_set_imm(pkt, ibwr->ex.imm_data); @@ -484,6 +498,14 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av, memset(pad, 0, bth_pad(pkt)); } + } else if (pkt->mask & RXE_FLUSH_MASK) { + /* oA19-2: shall have no payload. */ + wqe->dma.resid = 0; + } + + if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload); + wqe->dma.resid -= payload; } return 0; @@ -595,7 +617,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) } break; default: - pr_err("Unexpected send wqe opcode %d\n", opcode); + rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode); wqe->status = IB_WC_LOC_QP_OP_ERR; return -EINVAL; } @@ -608,7 +630,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe) * which can lead to a deadlock. So go ahead and complete * it now. */ - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return 0; } @@ -709,13 +731,15 @@ int rxe_requester(void *arg) } mask = rxe_opcode[opcode].mask; - if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) { + if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK | + RXE_ATOMIC_WRITE_MASK))) { if (check_init_depth(qp, wqe)) goto exit; } mtu = get_mtu(qp); - payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0; + payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ? + wqe->dma.resid : 0; if (payload > mtu) { if (qp_type(qp) == IB_QPT_UD) { /* C10-93.1.1: If the total sum of all the buffer lengths specified for a @@ -733,7 +757,7 @@ int rxe_requester(void *arg) qp->req.wqe_index); wqe->state = wqe_state_done; wqe->status = IB_WC_SUCCESS; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); goto done; } payload = mtu; @@ -748,14 +772,14 @@ int rxe_requester(void *arg) av = rxe_get_av(&pkt, &ah); if (unlikely(!av)) { - pr_err("qp#%d Failed no address vector\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed no address vector\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; goto err; } skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt); if (unlikely(!skb)) { - pr_err("qp#%d Failed allocating skb\n", qp_num(qp)); + rxe_dbg_qp(qp, "Failed allocating skb\n"); wqe->status = IB_WC_LOC_QP_OP_ERR; if (ah) rxe_put(ah); @@ -764,7 +788,7 @@ int rxe_requester(void *arg) err = finish_packet(qp, av, wqe, &pkt, skb, payload); if (unlikely(err)) { - pr_debug("qp#%d Error during finish packet\n", qp_num(qp)); + rxe_dbg_qp(qp, "Error during finish packet\n"); if (err == -EFAULT) wqe->status = IB_WC_LOC_PROT_ERR; else @@ -795,7 +819,7 @@ int rxe_requester(void *arg) rollback_state(wqe, qp, &rollback_wqe, rollback_psn); if (err == -EAGAIN) { - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); goto exit; } @@ -817,7 +841,7 @@ err: qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index); wqe->state = wqe_state_error; qp->req.state = QP_STATE_ERROR; - rxe_run_task(&qp->comp.task, 0); + rxe_run_task(&qp->comp.task); exit: ret = -EAGAIN; out: diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 693081e813ec08c8d37619da75c84a82a4d5acfa..7a60c7709da045510f14d01005c63e74724cad28 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -22,6 +22,8 @@ enum resp_states { RESPST_EXECUTE, RESPST_READ_REPLY, RESPST_ATOMIC_REPLY, + RESPST_ATOMIC_WRITE_REPLY, + RESPST_PROCESS_FLUSH, RESPST_COMPLETE, RESPST_ACKNOWLEDGE, RESPST_CLEANUP, @@ -57,6 +59,8 @@ static char *resp_state_name[] = { [RESPST_EXECUTE] = "EXECUTE", [RESPST_READ_REPLY] = "READ_REPLY", [RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY", + [RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY", + [RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH", [RESPST_COMPLETE] = "COMPLETE", [RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE", [RESPST_CLEANUP] = "CLEANUP", @@ -91,7 +95,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb) must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) || (skb_queue_len(&qp->req_pkts) > 1); - rxe_run_task(&qp->resp.task, must_sched); + if (must_sched) + rxe_sched_task(&qp->resp.task); + else + rxe_run_task(&qp->resp.task); } static inline enum resp_states get_req(struct rxe_qp *qp, @@ -253,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp, } } +static bool check_qp_attr_access(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + if (((pkt->mask & RXE_READ_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || + ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || + ((pkt->mask & RXE_ATOMIC_MASK) && + !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) + return false; + + if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if ((flush_type & IB_FLUSH_GLOBAL && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) || + (flush_type & IB_FLUSH_PERSISTENT && + !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT))) + return false; + } + + return true; +} + static enum resp_states check_op_valid(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { switch (qp_type(qp)) { case IB_QPT_RC: - if (((pkt->mask & RXE_READ_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) || - ((pkt->mask & RXE_WRITE_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) || - ((pkt->mask & RXE_ATOMIC_MASK) && - !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) { + if (!check_qp_attr_access(qp, pkt)) return RESPST_ERR_UNSUPPORTED_OPCODE; - } break; @@ -314,7 +339,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp) /* don't trust user space data */ if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) { spin_unlock_irqrestore(&srq->rq.consumer_lock, flags); - pr_warn("%s: invalid num_sge in SRQ entry\n", __func__); + rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n"); return RESPST_ERR_MALFORMED_WQE; } size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge); @@ -364,7 +389,7 @@ static enum resp_states check_resource(struct rxe_qp *qp, } } - if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) { + if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) { /* it is the requesters job to not send * too many read/atomic ops, we just * recycle the responder resource queue @@ -387,19 +412,66 @@ static enum resp_states check_resource(struct rxe_qp *qp, return RESPST_CHK_LENGTH; } -static enum resp_states check_length(struct rxe_qp *qp, - struct rxe_pkt_info *pkt) +static enum resp_states rxe_resp_check_length(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) { - switch (qp_type(qp)) { - case IB_QPT_RC: - return RESPST_CHK_RKEY; - - case IB_QPT_UC: - return RESPST_CHK_RKEY; + /* + * See IBA C9-92 + * For UD QPs we only check if the packet will fit in the + * receive buffer later. For rmda operations additional + * length checks are performed in check_rkey. + */ + if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) || + (qp_type(qp) == IB_QPT_UC))) { + unsigned int mtu = qp->mtu; + unsigned int payload = payload_size(pkt); + + if ((pkt->mask & RXE_START_MASK) && + (pkt->mask & RXE_END_MASK)) { + if (unlikely(payload > mtu)) { + rxe_dbg_qp(qp, "only packet too long"); + return RESPST_ERR_LENGTH; + } + } else if ((pkt->mask & RXE_START_MASK) || + (pkt->mask & RXE_MIDDLE_MASK)) { + if (unlikely(payload != mtu)) { + rxe_dbg_qp(qp, "first or middle packet not mtu"); + return RESPST_ERR_LENGTH; + } + } else if (pkt->mask & RXE_END_MASK) { + if (unlikely((payload == 0) || (payload > mtu))) { + rxe_dbg_qp(qp, "last packet zero or too long"); + return RESPST_ERR_LENGTH; + } + } + } - default: - return RESPST_CHK_RKEY; + /* See IBA C9-94 */ + if (pkt->mask & RXE_RETH_MASK) { + if (reth_len(pkt) > (1U << 31)) { + rxe_dbg_qp(qp, "dma length too long"); + return RESPST_ERR_LENGTH; + } } + + return RESPST_CHK_RKEY; +} + +static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = reth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = reth_rkey(pkt); + qp->resp.resid = reth_len(pkt); + qp->resp.length = reth_len(pkt); +} + +static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt) +{ + qp->resp.va = atmeth_va(pkt); + qp->resp.offset = 0; + qp->resp.rkey = atmeth_rkey(pkt); + qp->resp.resid = sizeof(u64); } static enum resp_states check_rkey(struct rxe_qp *qp, @@ -413,29 +485,32 @@ static enum resp_states check_rkey(struct rxe_qp *qp, u32 pktlen; int mtu = qp->mtu; enum resp_states state; - int access; - - if (pkt->mask & RXE_READ_OR_WRITE_MASK) { - if (pkt->mask & RXE_RETH_MASK) { - qp->resp.va = reth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = reth_rkey(pkt); - qp->resp.resid = reth_len(pkt); - qp->resp.length = reth_len(pkt); - } + int access = 0; + + if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ : IB_ACCESS_REMOTE_WRITE; + } else if (pkt->mask & RXE_FLUSH_MASK) { + u32 flush_type = feth_plt(pkt); + + if (pkt->mask & RXE_RETH_MASK) + qp_resp_from_reth(qp, pkt); + + if (flush_type & IB_FLUSH_GLOBAL) + access |= IB_ACCESS_FLUSH_GLOBAL; + if (flush_type & IB_FLUSH_PERSISTENT) + access |= IB_ACCESS_FLUSH_PERSISTENT; } else if (pkt->mask & RXE_ATOMIC_MASK) { - qp->resp.va = atmeth_va(pkt); - qp->resp.offset = 0; - qp->resp.rkey = atmeth_rkey(pkt); - qp->resp.resid = sizeof(u64); + qp_resp_from_atmeth(qp, pkt); access = IB_ACCESS_REMOTE_ATOMIC; } else { return RESPST_EXECUTE; } - /* A zero-byte op is not required to set an addr or rkey. */ + /* A zero-byte op is not required to set an addr or rkey. See C9-88 */ if ((pkt->mask & RXE_READ_OR_WRITE_MASK) && (pkt->mask & RXE_RETH_MASK) && reth_len(pkt) == 0) { @@ -450,15 +525,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp, if (rkey_is_mw(rkey)) { mw = rxe_lookup_mw(qp, access, rkey); if (!mw) { - pr_debug("%s: no MW matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } mr = mw->mr; if (!mr) { - pr_err("%s: MW doesn't have an MR\n", __func__); + rxe_dbg_qp(qp, "MW doesn't have an MR\n"); state = RESPST_ERR_RKEY_VIOLATION; goto err; } @@ -471,19 +545,27 @@ static enum resp_states check_rkey(struct rxe_qp *qp, } else { mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE); if (!mr) { - pr_debug("%s: no MR matches rkey %#x\n", - __func__, rkey); + rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey); state = RESPST_ERR_RKEY_VIOLATION; goto err; } } + if (pkt->mask & RXE_FLUSH_MASK) { + /* FLUSH MR may not set va or resid + * no need to check range since we will flush whole mr + */ + if (feth_sel(pkt) == IB_FLUSH_MR) + goto skip_check_range; + } + if (mr_check_range(mr, va + qp->resp.offset, resid)) { state = RESPST_ERR_RKEY_VIOLATION; goto err; } - if (pkt->mask & RXE_WRITE_MASK) { +skip_check_range: + if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) { if (resid > mtu) { if (pktlen != mtu || bth_pad(pkt)) { state = RESPST_ERR_LENGTH; @@ -583,15 +665,66 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp, res->state = rdatm_res_state_new; break; case RXE_ATOMIC_MASK: + case RXE_ATOMIC_WRITE_MASK: res->first_psn = pkt->psn; res->last_psn = pkt->psn; res->cur_psn = pkt->psn; break; + case RXE_FLUSH_MASK: + res->flush.va = qp->resp.va + qp->resp.offset; + res->flush.length = qp->resp.length; + res->flush.type = feth_plt(pkt); + res->flush.level = feth_sel(pkt); } return res; } +static enum resp_states process_flush(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 length, start; + struct rxe_mr *mr = qp->resp.mr; + struct resp_res *res = qp->resp.res; + + /* oA19-14, oA19-15 */ + if (res && res->replay) + return RESPST_ACKNOWLEDGE; + else if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK); + qp->resp.res = res; + } + + if (res->flush.level == IB_FLUSH_RANGE) { + start = res->flush.va; + length = res->flush.length; + } else { /* level == IB_FLUSH_MR */ + start = mr->ibmr.iova; + length = mr->ibmr.length; + } + + if (res->flush.type & IB_FLUSH_PERSISTENT) { + if (rxe_flush_pmem_iova(mr, start, length)) + return RESPST_ERR_RKEY_VIOLATION; + /* Make data persistent. */ + wmb(); + } else if (res->flush.type & IB_FLUSH_GLOBAL) { + /* Make data global visibility. */ + wmb(); + } + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +} + /* Guarantee atomicity of atomic operations at the machine level. */ static DEFINE_SPINLOCK(atomic_ops_lock); @@ -652,6 +785,55 @@ out: return ret; } +static enum resp_states atomic_write_reply(struct rxe_qp *qp, + struct rxe_pkt_info *pkt) +{ + u64 src, *dst; + struct resp_res *res = qp->resp.res; + struct rxe_mr *mr = qp->resp.mr; + int payload = payload_size(pkt); + + if (!res) { + res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK); + qp->resp.res = res; + } + + if (!res->replay) { +#ifdef CONFIG_64BIT + if (mr->state != RXE_MR_STATE_VALID) + return RESPST_ERR_RKEY_VIOLATION; + + memcpy(&src, payload_addr(pkt), payload); + + dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload); + /* check vaddr is 8 bytes aligned. */ + if (!dst || (uintptr_t)dst & 7) + return RESPST_ERR_MISALIGNED_ATOMIC; + + /* Do atomic write after all prior operations have completed */ + smp_store_release(dst, src); + + /* decrease resp.resid to zero */ + qp->resp.resid -= sizeof(payload); + + qp->resp.msn++; + + /* next expected psn, read handles this separately */ + qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK; + qp->resp.ack_psn = qp->resp.psn; + + qp->resp.opcode = pkt->opcode; + qp->resp.status = IB_WC_SUCCESS; + + return RESPST_ACKNOWLEDGE; +#else + return RESPST_ERR_UNSUPPORTED_OPCODE; +#endif /* CONFIG_64BIT */ + } + + return RESPST_ACKNOWLEDGE; +} + static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp, struct rxe_pkt_info *ack, int opcode, @@ -807,14 +989,19 @@ static enum resp_states read_reply(struct rxe_qp *qp, skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload, res->cur_psn, AETH_ACK_UNLIMITED); if (!skb) { - rxe_put(mr); + if (mr) + rxe_put(mr); return RESPST_ERR_RNR; } - rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), - payload, RXE_FROM_MR_OBJ); + err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt), + payload, RXE_FROM_MR_OBJ); if (mr) rxe_put(mr); + if (err) { + kfree_skb(skb); + return RESPST_ERR_RKEY_VIOLATION; + } if (bth_pad(&ack_pkt)) { u8 *pad = payload_addr(&ack_pkt) + payload; @@ -890,6 +1077,10 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt) return RESPST_READ_REPLY; } else if (pkt->mask & RXE_ATOMIC_MASK) { return RESPST_ATOMIC_REPLY; + } else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) { + return RESPST_ATOMIC_WRITE_REPLY; + } else if (pkt->mask & RXE_FLUSH_MASK) { + return RESPST_PROCESS_FLUSH; } else { /* Unreachable */ WARN_ON_ONCE(1); @@ -1040,7 +1231,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn, err = rxe_xmit_packet(qp, &ack_pkt, skb); if (err) - pr_err_ratelimited("Failed sending %s\n", msg); + rxe_dbg_qp(qp, "Failed sending %s\n", msg); return err; } @@ -1063,6 +1254,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) return ret; } +static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn) +{ + int ret = send_common_ack(qp, syndrome, psn, + IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY, + "RDMA READ response of length zero ACK"); + + /* have to clear this since it is used to trigger + * long read replies + */ + qp->resp.res = NULL; + return ret; +} + static enum resp_states acknowledge(struct rxe_qp *qp, struct rxe_pkt_info *pkt) { @@ -1073,6 +1277,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp, send_ack(qp, qp->resp.aeth_syndrome, pkt->psn); else if (pkt->mask & RXE_ATOMIC_MASK) send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); + else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK)) + send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); else if (bth_ack(pkt)) send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn); @@ -1129,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, /* SEND. Ack again and cleanup. C9-105. */ send_ack(qp, AETH_ACK_UNLIMITED, prev_psn); return RESPST_CLEANUP; + } else if (pkt->mask & RXE_FLUSH_MASK) { + struct resp_res *res; + + /* Find the operation in our list of responder resources. */ + res = find_resource(qp, pkt->psn); + if (res) { + res->replay = 1; + res->cur_psn = pkt->psn; + qp->resp.res = res; + rc = RESPST_PROCESS_FLUSH; + goto out; + } + + /* Resource not found. Class D error. Drop the request. */ + rc = RESPST_CLEANUP; + goto out; } else if (pkt->mask & RXE_READ_MASK) { struct resp_res *res; @@ -1184,7 +1406,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp, res->replay = 1; res->cur_psn = pkt->psn; qp->resp.res = res; - rc = RESPST_ATOMIC_REPLY; + rc = pkt->mask & RXE_ATOMIC_MASK ? + RESPST_ATOMIC_REPLY : + RESPST_ATOMIC_WRITE_REPLY; goto out; } @@ -1286,8 +1510,7 @@ int rxe_responder(void *arg) } while (1) { - pr_debug("qp#%d state = %s\n", qp_num(qp), - resp_state_name[state]); + rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]); switch (state) { case RESPST_GET_REQ: state = get_req(qp, &pkt); @@ -1305,7 +1528,7 @@ int rxe_responder(void *arg) state = check_resource(qp, pkt); break; case RESPST_CHK_LENGTH: - state = check_length(qp, pkt); + state = rxe_resp_check_length(qp, pkt); break; case RESPST_CHK_RKEY: state = check_rkey(qp, pkt); @@ -1322,6 +1545,12 @@ int rxe_responder(void *arg) case RESPST_ATOMIC_REPLY: state = atomic_reply(qp, pkt); break; + case RESPST_ATOMIC_WRITE_REPLY: + state = atomic_write_reply(qp, pkt); + break; + case RESPST_PROCESS_FLUSH: + state = process_flush(qp, pkt); + break; case RESPST_ACKNOWLEDGE: state = acknowledge(qp, pkt); break; @@ -1444,7 +1673,7 @@ int rxe_responder(void *arg) case RESPST_ERROR: qp->resp.goto_error = 0; - pr_debug("qp#%d moved to error state\n", qp_num(qp)); + rxe_dbg_qp(qp, "moved to error state\n"); rxe_qp_error(qp); goto exit; diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c index 02b39498c370d24e53bb61a8a2b79cf7b01c9432..82e37a41ced40def8626a5ff1560afb472ed14d3 100644 --- a/drivers/infiniband/sw/rxe/rxe_srq.c +++ b/drivers/infiniband/sw/rxe/rxe_srq.c @@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) struct ib_srq_attr *attr = &init->attr; if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } @@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init) attr->max_wr = RXE_MIN_SRQ_WR; if (attr->max_sge > rxe->attr.max_srq_sge) { - pr_warn("max_sge(%d) > max_srq_sge(%d)\n", + rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n", attr->max_sge, rxe->attr.max_srq_sge); goto err1; } @@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq, type = QUEUE_TYPE_FROM_CLIENT; q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type); if (!q) { - pr_warn("unable to allocate queue for srq\n"); + rxe_dbg_srq(srq, "Unable to allocate queue\n"); return -ENOMEM; } @@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask) { if (srq->error) { - pr_warn("srq in error state\n"); + rxe_dbg_srq(srq, "in error state\n"); goto err1; } if (mask & IB_SRQ_MAX_WR) { if (attr->max_wr > rxe->attr.max_srq_wr) { - pr_warn("max_wr(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n", attr->max_wr, rxe->attr.max_srq_wr); goto err1; } if (attr->max_wr <= 0) { - pr_warn("max_wr(%d) <= 0\n", attr->max_wr); + rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr); goto err1; } if (srq->limit && (attr->max_wr < srq->limit)) { - pr_warn("max_wr (%d) < srq->limit (%d)\n", + rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n", attr->max_wr, srq->limit); goto err1; } @@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq, if (mask & IB_SRQ_LIMIT) { if (attr->srq_limit > rxe->attr.max_srq_wr) { - pr_warn("srq_limit(%d) > max_srq_wr(%d)\n", + rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n", attr->srq_limit, rxe->attr.max_srq_wr); goto err1; } if (attr->srq_limit > srq->rq.queue->buf->index_mask) { - pr_warn("srq_limit (%d) > cur limit(%d)\n", + rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n", attr->srq_limit, srq->rq.queue->buf->index_mask); goto err1; diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c index ec2b7de1c4972105e1552b2bf3c8190869ce86a8..60b90e33a88496a4bbaf2e97dd0563c862ffd081 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.c +++ b/drivers/infiniband/sw/rxe/rxe_task.c @@ -4,10 +4,6 @@ * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. */ -#include -#include -#include - #include "rxe.h" int __rxe_do_task(struct rxe_task *task) @@ -28,30 +24,31 @@ int __rxe_do_task(struct rxe_task *task) * a second caller finds the task already running * but looks just after the last call to func */ -void rxe_do_task(struct tasklet_struct *t) +static void do_task(struct tasklet_struct *t) { int cont; int ret; struct rxe_task *task = from_tasklet(task, t, tasklet); + struct rxe_qp *qp = (struct rxe_qp *)task->arg; unsigned int iterations = RXE_MAX_ITERATIONS; - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_START: task->state = TASK_STATE_BUSY; - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); break; case TASK_STATE_BUSY: task->state = TASK_STATE_ARMED; fallthrough; case TASK_STATE_ARMED: - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); return; default: - spin_unlock_bh(&task->state_lock); - pr_warn("%s failed with bad state %d\n", __func__, task->state); + spin_unlock_bh(&task->lock); + rxe_dbg_qp(qp, "failed with bad state %d\n", task->state); return; } @@ -59,7 +56,7 @@ void rxe_do_task(struct tasklet_struct *t) cont = 0; ret = task->func(task->arg); - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); switch (task->state) { case TASK_STATE_BUSY: if (ret) { @@ -85,27 +82,25 @@ void rxe_do_task(struct tasklet_struct *t) break; default: - pr_warn("%s failed with bad state %d\n", __func__, - task->state); + rxe_dbg_qp(qp, "failed with bad state %d\n", + task->state); } - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (cont); task->ret = ret; } -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name) +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)) { task->arg = arg; task->func = func; - snprintf(task->name, sizeof(task->name), "%s", name); task->destroyed = false; - tasklet_setup(&task->tasklet, rxe_do_task); + tasklet_setup(&task->tasklet, do_task); task->state = TASK_STATE_START; - spin_lock_init(&task->state_lock); + spin_lock_init(&task->lock); return 0; } @@ -121,23 +116,28 @@ void rxe_cleanup_task(struct rxe_task *task) task->destroyed = true; do { - spin_lock_bh(&task->state_lock); + spin_lock_bh(&task->lock); idle = (task->state == TASK_STATE_START); - spin_unlock_bh(&task->state_lock); + spin_unlock_bh(&task->lock); } while (!idle); tasklet_kill(&task->tasklet); } -void rxe_run_task(struct rxe_task *task, int sched) +void rxe_run_task(struct rxe_task *task) +{ + if (task->destroyed) + return; + + do_task(&task->tasklet); +} + +void rxe_sched_task(struct rxe_task *task) { if (task->destroyed) return; - if (sched) - tasklet_schedule(&task->tasklet); - else - rxe_do_task(&task->tasklet); + tasklet_schedule(&task->tasklet); } void rxe_disable_task(struct rxe_task *task) diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h index 7f612a1c68a7ba3c76d02aaa1fd5ee04c384a2bd..7b88129702ac6dae833368ec3f7c7c8413ebb943 100644 --- a/drivers/infiniband/sw/rxe/rxe_task.h +++ b/drivers/infiniband/sw/rxe/rxe_task.h @@ -21,11 +21,10 @@ enum { struct rxe_task { struct tasklet_struct tasklet; int state; - spinlock_t state_lock; /* spinlock for task state */ + spinlock_t lock; void *arg; int (*func)(void *arg); int ret; - char name[16]; bool destroyed; }; @@ -34,8 +33,7 @@ struct rxe_task { * arg => parameter to pass to fcn * func => function to call until it returns != 0 */ -int rxe_init_task(struct rxe_task *task, - void *arg, int (*func)(void *), char *name); +int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *)); /* cleanup task */ void rxe_cleanup_task(struct rxe_task *task); @@ -46,18 +44,9 @@ void rxe_cleanup_task(struct rxe_task *task); */ int __rxe_do_task(struct rxe_task *task); -/* - * common function called by any of the main tasklets - * If there is any chance that there is additional - * work to do someone must reschedule the task before - * leaving - */ -void rxe_do_task(struct tasklet_struct *t); +void rxe_run_task(struct rxe_task *task); -/* run a task, else schedule it to run as a tasklet, The decision - * to run or schedule tasklet is based on the parameter sched. - */ -void rxe_run_task(struct rxe_task *task, int sched); +void rxe_sched_task(struct rxe_task *task); /* keep a task from scheduling */ void rxe_disable_task(struct rxe_task *task); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 88825edc7dce19c0ac4a2806586fe68202e7d4aa..025b35bf014e2a24f292ac838a2ac66e7087b17a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah, ah->is_user = false; } - err = rxe_av_chk_attr(rxe, init_attr->ah_attr); - if (err) - return err; - err = rxe_add_to_pool_ah(&rxe->ah_pool, ah, init_attr->flags & RDMA_CREATE_AH_SLEEPABLE); if (err) @@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah, /* create index > 0 */ ah->ah_num = ah->elem.index; + err = rxe_ah_chk_attr(ah, init_attr->ah_attr); + if (err) { + rxe_cleanup(ah); + return err; + } + if (uresp) { /* only if new user provider */ err = copy_to_user(&uresp->ah_num, &ah->ah_num, @@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah, static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { int err; - struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); - err = rxe_av_chk_attr(rxe, attr); + err = rxe_ah_chk_attr(ah, attr); if (err) return err; @@ -238,7 +239,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags) static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { - int err; int i; u32 length; struct rxe_recv_wqe *recv_wqe; @@ -246,15 +246,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) int full; full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER); - if (unlikely(full)) { - err = -ENOMEM; - goto err1; - } + if (unlikely(full)) + return -ENOMEM; - if (unlikely(num_sge > rq->max_sge)) { - err = -EINVAL; - goto err1; - } + if (unlikely(num_sge > rq->max_sge)) + return -EINVAL; length = 0; for (i = 0; i < num_sge; i++) @@ -275,9 +271,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER); return 0; - -err1: - return err; } static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, @@ -343,10 +336,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, if (err) return err; - err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); - if (err) - return err; - return 0; + return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) @@ -453,11 +443,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) - goto err1; + return err; err = rxe_qp_from_attr(qp, attr, mask, udata); if (err) - goto err1; + return err; if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH)) qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label, @@ -465,9 +455,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, qp->attr.dest_qp_num); return 0; - -err1: - return err; } static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, @@ -501,24 +488,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, struct rxe_sq *sq = &qp->sq; if (unlikely(num_sge > sq->max_sge)) - goto err1; + return -EINVAL; if (unlikely(mask & WR_ATOMIC_MASK)) { if (length < 8) - goto err1; + return -EINVAL; if (atomic_wr(ibwr)->remote_addr & 0x7) - goto err1; + return -EINVAL; } if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && (length > sq->max_inline))) - goto err1; + return -EINVAL; return 0; - -err1: - return -EINVAL; } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, @@ -695,9 +679,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, wr = next; } - rxe_run_task(&qp->req.task, 1); + rxe_sched_task(&qp->req.task); if (unlikely(qp->req.state == QP_STATE_ERROR)) - rxe_run_task(&qp->comp.task, 1); + rxe_sched_task(&qp->comp.task); return err; } @@ -719,7 +703,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, if (qp->is_user) { /* Utilize process context to do protocol processing */ - rxe_run_task(&qp->req.task, 0); + rxe_run_task(&qp->req.task); return 0; } else return rxe_post_send_kernel(qp, wr, bad_wr); @@ -735,14 +719,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } if (unlikely(qp->srq)) { *bad_wr = wr; - err = -EINVAL; - goto err1; + return -EINVAL; } spin_lock_irqsave(&rq->producer_lock, flags); @@ -759,9 +741,8 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) - rxe_run_task(&qp->resp.task, 1); + rxe_sched_task(&qp->resp.task); -err1: return err; } @@ -826,16 +807,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) err = rxe_cq_chk_attr(rxe, cq, cqe, 0); if (err) - goto err1; - - err = rxe_cq_resize_queue(cq, cqe, uresp, udata); - if (err) - goto err1; - - return 0; + return err; -err1: - return err; + return rxe_cq_resize_queue(cq, cqe, uresp, udata); } static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) @@ -902,6 +876,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; rxe_mr_init_dma(access, mr); rxe_finalize(mr); @@ -921,26 +896,23 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, struct rxe_mr *mr; mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err2; - } - + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_user(rxe, start, length, iova, access, mr); if (err) - goto err3; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err3: +err1: rxe_cleanup(mr); -err2: return ERR_PTR(err); } @@ -956,25 +928,23 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, return ERR_PTR(-EINVAL); mr = rxe_alloc(&rxe->mr_pool); - if (!mr) { - err = -ENOMEM; - goto err1; - } + if (!mr) + return ERR_PTR(-ENOMEM); rxe_get(pd); mr->ibmr.pd = ibpd; + mr->ibmr.device = ibpd->device; err = rxe_mr_init_fast(max_num_sg, mr); if (err) - goto err2; + goto err1; rxe_finalize(mr); return &mr->ibmr; -err2: - rxe_cleanup(mr); err1: + rxe_cleanup(mr); return ERR_PTR(err); } @@ -1134,7 +1104,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) err = ib_register_device(dev, ibdev_name, NULL); if (err) - pr_warn("%s failed with error %d\n", __func__, err); + rxe_dbg(rxe, "failed with error %d\n", err); /* * Note that rxe may be invalid at this point if another thread diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h index 5f5cbfcb35695b930168df9f9f96c3ff2122bef9..19ddfa89048035d64ca551b711961aa4529da36a 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.h +++ b/drivers/infiniband/sw/rxe/rxe_verbs.h @@ -165,6 +165,12 @@ struct resp_res { u64 va; u32 resid; } read; + struct { + u32 length; + u64 va; + u8 type; + u8 level; + } flush; }; }; @@ -304,7 +310,6 @@ struct rxe_mr { u32 lkey; u32 rkey; enum rxe_mr_state state; - enum ib_mr_type type; u32 offset; int access; diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c index d68e37859e73bf37f0fd64a8a8e120479d93418b..403029de6b92d45050191c31c3feef64a9bd68e1 100644 --- a/drivers/infiniband/sw/siw/siw_cq.c +++ b/drivers/infiniband/sw/siw/siw_cq.c @@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) { memset(wc, 0, sizeof(*wc)); wc->wr_id = cqe->id; - wc->status = map_cqe_status[cqe->status].ib; - wc->opcode = map_wc_opcode[cqe->opcode]; wc->byte_len = cqe->bytes; /* @@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc) wc->wc_flags = IB_WC_WITH_INVALIDATE; } wc->qp = cqe->base_qp; + wc->opcode = map_wc_opcode[cqe->opcode]; + wc->status = map_cqe_status[cqe->status].ib; siw_dbg_cq(cq, "idx %u, type %d, flags %2x, id 0x%pK\n", cq->cq_get % cq->num_cqe, cqe->opcode, cqe->flags, (void *)(uintptr_t)cqe->id); + } else { + /* + * A malicious user may set invalid opcode or + * status in the user mmapped CQE array. + * Sanity check and correct values in that case + * to avoid out-of-bounds access to global arrays + * for opcode and status mapping. + */ + u8 opcode = cqe->opcode; + u16 status = cqe->status; + + if (opcode >= SIW_NUM_OPCODES) { + opcode = 0; + status = SIW_WC_GENERAL_ERR; + } else if (status >= SIW_NUM_WC_STATUS) { + status = SIW_WC_GENERAL_ERR; + } + wc->opcode = map_wc_opcode[opcode]; + wc->status = map_cqe_status[status].ib; + } WRITE_ONCE(cqe->flags, 0); cq->cq_get++; diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c index 3e814cfb298cf8342734fa6917eca138b2ee38b4..906fde1a2a0de2d9ebea9f2e340aacbe2b816fd5 100644 --- a/drivers/infiniband/sw/siw/siw_verbs.c +++ b/drivers/infiniband/sw/siw/siw_verbs.c @@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr, static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { - struct siw_sqe sqe = {}; int rv = 0; while (wr) { - sqe.id = wr->wr_id; - sqe.opcode = wr->opcode; - rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR); + struct siw_sqe sqe = {}; + + switch (wr->opcode) { + case IB_WR_RDMA_WRITE: + sqe.opcode = SIW_OP_WRITE; + break; + case IB_WR_RDMA_READ: + sqe.opcode = SIW_OP_READ; + break; + case IB_WR_RDMA_READ_WITH_INV: + sqe.opcode = SIW_OP_READ_LOCAL_INV; + break; + case IB_WR_SEND: + sqe.opcode = SIW_OP_SEND; + break; + case IB_WR_SEND_WITH_IMM: + sqe.opcode = SIW_OP_SEND_WITH_IMM; + break; + case IB_WR_SEND_WITH_INV: + sqe.opcode = SIW_OP_SEND_REMOTE_INV; + break; + case IB_WR_LOCAL_INV: + sqe.opcode = SIW_OP_INVAL_STAG; + break; + case IB_WR_REG_MR: + sqe.opcode = SIW_OP_REG_MR; + break; + default: + rv = -EINVAL; + break; + } + if (!rv) { + sqe.id = wr->wr_id; + rv = siw_sqe_complete(qp, &sqe, 0, + SIW_WC_WR_FLUSH_ERR); + } if (rv) { if (bad_wr) *bad_wr = wr; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c index ea16ba5d8da6c8c0fc31dc4bd2c64cf83fe66f71..9ad8d985627524118150ddcdbcd583d4d576257e 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c @@ -41,6 +41,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = { [IFLA_IPOIB_UMCAST] = { .type = NLA_U16 }, }; +static unsigned int ipoib_get_max_num_queues(void) +{ + return min_t(unsigned int, num_possible_cpus(), 128); +} + static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev) { struct ipoib_dev_priv *priv = ipoib_priv(dev); @@ -172,6 +177,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = { .changelink = ipoib_changelink, .get_size = ipoib_get_size, .fill_info = ipoib_fill_info, + .get_num_rx_queues = ipoib_get_max_num_queues, + .get_num_tx_queues = ipoib_get_max_num_queues, }; struct rtnl_link_ops *ipoib_get_link_ops(void) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index a00ca117303a98cc8ca00c5e7e5aa933c617d64d..1b8eda0dae4e0cae4bb97d48ec0893f65584ca63 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -347,22 +347,6 @@ static void iser_device_try_release(struct iser_device *device) mutex_unlock(&ig.device_list_mutex); } -/* - * Called with state mutex held - */ -static int iser_conn_state_comp_exch(struct iser_conn *iser_conn, - enum iser_conn_state comp, - enum iser_conn_state exch) -{ - int ret; - - ret = (iser_conn->state == comp); - if (ret) - iser_conn->state = exch; - - return ret; -} - void iser_release_work(struct work_struct *work) { struct iser_conn *iser_conn; @@ -464,11 +448,13 @@ int iser_conn_terminate(struct iser_conn *iser_conn) struct ib_conn *ib_conn = &iser_conn->ib_conn; int err = 0; + lockdep_assert_held(&iser_conn->state_mutex); + /* terminate the iser conn only if the conn state is UP */ - if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP, - ISER_CONN_TERMINATING)) + if (iser_conn->state != ISER_CONN_UP) return 0; + iser_conn->state = ISER_CONN_TERMINATING; iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state); /* suspend queuing of new iscsi commands */ @@ -498,9 +484,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn) */ static void iser_connect_error(struct rdma_cm_id *cma_id) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = cma_id->context; + + lockdep_assert_held(&iser_conn->state_mutex); - iser_conn = cma_id->context; iser_conn->state = ISER_CONN_TERMINATING; } @@ -542,12 +529,13 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn, */ static void iser_addr_handler(struct rdma_cm_id *cma_id) { + struct iser_conn *iser_conn = cma_id->context; struct iser_device *device; - struct iser_conn *iser_conn; struct ib_conn *ib_conn; int ret; - iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -597,6 +585,8 @@ static void iser_route_handler(struct rdma_cm_id *cma_id) struct ib_conn *ib_conn = &iser_conn->ib_conn; struct ib_device *ib_dev = ib_conn->device->ib_device; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -629,14 +619,18 @@ failure: iser_connect_error(cma_id); } +/* + * Called with state mutex held + */ static void iser_connected_handler(struct rdma_cm_id *cma_id, const void *private_data) { - struct iser_conn *iser_conn; + struct iser_conn *iser_conn = cma_id->context; struct ib_qp_attr attr; struct ib_qp_init_attr init_attr; - iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); + if (iser_conn->state != ISER_CONN_PENDING) /* bailout */ return; @@ -657,30 +651,27 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id, complete(&iser_conn->up_completion); } -static void iser_disconnected_handler(struct rdma_cm_id *cma_id) -{ - struct iser_conn *iser_conn = cma_id->context; - - if (iser_conn_terminate(iser_conn)) { - if (iser_conn->iscsi_conn) - iscsi_conn_failure(iser_conn->iscsi_conn, - ISCSI_ERR_CONN_FAILED); - else - iser_err("iscsi_iser connection isn't bound\n"); - } -} - +/* + * Called with state mutex held + */ static void iser_cleanup_handler(struct rdma_cm_id *cma_id, bool destroy) { struct iser_conn *iser_conn = cma_id->context; + lockdep_assert_held(&iser_conn->state_mutex); /* * We are not guaranteed that we visited disconnected_handler * by now, call it here to be safe that we handle CM drep * and flush errors. */ - iser_disconnected_handler(cma_id); + if (iser_conn_terminate(iser_conn)) { + if (iser_conn->iscsi_conn) + iscsi_conn_failure(iser_conn->iscsi_conn, + ISCSI_ERR_CONN_FAILED); + else + iser_err("iscsi_iser connection isn't bound\n"); + } iser_free_ib_conn_res(iser_conn, destroy); complete(&iser_conn->ib_completion); } diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index b360a1527cd1028679da02e801c3b44f4d34429c..75404885cf98102ae6c15fe6020c71107a1e1531 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -993,9 +993,8 @@ isert_rx_login_req(struct isert_conn *isert_conn) * login request PDU. */ login->leading_connection = (!login_req->tsih) ? 1 : 0; - login->current_stage = - (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) - >> 2; + login->current_stage = ISCSI_LOGIN_CURRENT_STAGE( + login_req->flags); login->version_min = login_req->min_version; login->version_max = login_req->max_version; memcpy(login->isid, login_req->isid, 6); diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c index 205fd44a4727a817695e62ab9aa5e1190443ffff..80abf45a197ac079f6304da86d9ca5b5c8d41abb 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c @@ -1064,10 +1064,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count) /* Align the MR to a 4K page size to match the block virt boundary */ nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K); - if (nr < 0) - return nr; - if (nr < req->sg_cnt) - return -EINVAL; + if (nr != count) + return nr < 0 ? nr : -EINVAL; ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); return nr; diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h index a2420eecaf5a1080c8e9f5f49ba9b70123a5b806..ab25619261d28b98d90488822fae4971bf0a1a55 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h +++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h @@ -68,10 +68,7 @@ enum { struct rtrs_ib_dev; struct rtrs_rdma_dev_pd_ops { - struct rtrs_ib_dev *(*alloc)(void); - void (*free)(struct rtrs_ib_dev *dev); int (*init)(struct rtrs_ib_dev *dev); - void (*deinit)(struct rtrs_ib_dev *dev); }; struct rtrs_rdma_dev_pd { diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c index 2a3c9ac64a42e289c9e71dde10f03d4e5756a47f..c76ba29da1e206297fc6017c9984489f1b364dc2 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c @@ -203,7 +203,6 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path) mutex_lock(&srv->paths_mutex); if (!--srv->dev_ref) { - kobject_del(srv->kobj_paths); kobject_put(srv->kobj_paths); mutex_unlock(&srv->paths_mutex); device_del(&srv->dev); @@ -304,12 +303,18 @@ destroy_root: void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path) { - if (srv_path->kobj.state_in_sysfs) { + if (srv_path->stats->kobj_stats.state_in_sysfs) { + sysfs_remove_group(&srv_path->stats->kobj_stats, + &rtrs_srv_stats_attr_group); kobject_del(&srv_path->stats->kobj_stats); kobject_put(&srv_path->stats->kobj_stats); + } + + if (srv_path->kobj.state_in_sysfs) { sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group); + kobject_del(&srv_path->kobj); kobject_put(&srv_path->kobj); - - rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } + + rtrs_srv_destroy_once_sysfs_root_folders(srv_path); } diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c index 22d7ba05e9fe838b1fab5bde3e43b037d9b9bf0a..d1703e2c0b82fcc746154132fffafa5cbc61a77a 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c +++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c @@ -561,9 +561,11 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) { struct rtrs_srv_sess *srv = srv_path->srv; struct rtrs_path *ss = &srv_path->s; - int i, mri, err, mrs_num; + int i, err, mrs_num; unsigned int chunk_bits; int chunks_per_mr = 1; + struct ib_mr *mr; + struct sg_table *sgt; /* * Here we map queue_depth chunks to MR. Firstly we have to @@ -586,16 +588,14 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) if (!srv_path->mrs) return -ENOMEM; - srv_path->mrs_num = mrs_num; - - for (mri = 0; mri < mrs_num; mri++) { - struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri]; - struct sg_table *sgt = &srv_mr->sgt; + for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num; + srv_path->mrs_num++) { + struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num]; struct scatterlist *s; - struct ib_mr *mr; int nr, nr_sgt, chunks; - chunks = chunks_per_mr * mri; + sgt = &srv_mr->sgt; + chunks = chunks_per_mr * srv_path->mrs_num; if (!always_invalidate) chunks_per_mr = min_t(int, chunks_per_mr, srv->queue_depth - chunks); @@ -622,7 +622,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) } nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt, NULL, max_chunk_size); - if (nr < 0 || nr < sgt->nents) { + if (nr != nr_sgt) { err = nr < 0 ? nr : -EINVAL; goto dereg_mr; } @@ -644,31 +644,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path) ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); srv_mr->mr = mr; - - continue; -err: - while (mri--) { - srv_mr = &srv_path->mrs[mri]; - sgt = &srv_mr->sgt; - mr = srv_mr->mr; - rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1); -dereg_mr: - ib_dereg_mr(mr); -unmap_sg: - ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, - sgt->nents, DMA_BIDIRECTIONAL); -free_sg: - sg_free_table(sgt); - } - kfree(srv_path->mrs); - - return err; } chunk_bits = ilog2(srv->queue_depth - 1) + 1; srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); return 0; + +dereg_mr: + ib_dereg_mr(mr); +unmap_sg: + ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl, + sgt->nents, DMA_BIDIRECTIONAL); +free_sg: + sg_free_table(sgt); +err: + unmap_cont_bufs(srv_path); + + return err; } static void rtrs_srv_hb_err_handler(struct rtrs_con *c) @@ -1678,12 +1671,6 @@ static int create_con(struct rtrs_srv_path *srv_path, srv->queue_depth * (1 + 2) + 1); max_recv_wr = srv->queue_depth + 1; - /* - * If we have all receive requests posted and - * all write requests posted and each read request - * requires an invalidate request + drain - * and qp gets into error state. - */ } cq_num = max_send_wr + max_recv_wr; atomic_set(&con->c.sq_wr_avail, max_send_wr); @@ -1950,22 +1937,21 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, { struct rtrs_srv_path *srv_path = NULL; struct rtrs_path *s = NULL; + struct rtrs_con *c = NULL; - if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { - struct rtrs_con *c = cm_id->context; - - s = c->path; - srv_path = to_srv_path(s); - } - - switch (ev->event) { - case RDMA_CM_EVENT_CONNECT_REQUEST: + if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST) /* * In case of error cma.c will destroy cm_id, * see cma_process_remove() */ return rtrs_rdma_connect(cm_id, ev->param.conn.private_data, ev->param.conn.private_data_len); + + c = cm_id->context; + s = c->path; + srv_path = to_srv_path(s); + + switch (ev->event) { case RDMA_CM_EVENT_ESTABLISHED: /* Nothing here */ break; diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c index ed324b47d93ae48017c34a80ab70d0b609adfbc7..4bf9d868cc522bd8b7631f0f28aba9199385800c 100644 --- a/drivers/infiniband/ulp/rtrs/rtrs.c +++ b/drivers/infiniband/ulp/rtrs/rtrs.c @@ -557,7 +557,6 @@ EXPORT_SYMBOL(rtrs_addr_to_sockaddr); void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags, struct rtrs_rdma_dev_pd *pool) { - WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free)); INIT_LIST_HEAD(&pool->list); mutex_init(&pool->mutex); pool->pd_flags = pd_flags; @@ -583,15 +582,8 @@ static void dev_free(struct kref *ref) list_del(&dev->entry); mutex_unlock(&pool->mutex); - if (pool->ops && pool->ops->deinit) - pool->ops->deinit(dev); - ib_dealloc_pd(dev->ib_pd); - - if (pool->ops && pool->ops->free) - pool->ops->free(dev); - else - kfree(dev); + kfree(dev); } int rtrs_ib_dev_put(struct rtrs_ib_dev *dev) @@ -618,11 +610,8 @@ rtrs_ib_dev_find_or_add(struct ib_device *ib_dev, goto out_unlock; } mutex_unlock(&pool->mutex); - if (pool->ops && pool->ops->alloc) - dev = pool->ops->alloc(); - else - dev = kzalloc(sizeof(*dev), GFP_KERNEL); - if (IS_ERR_OR_NULL(dev)) + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) goto out_err; kref_init(&dev->ref); @@ -644,10 +633,7 @@ out_unlock: out_free_pd: ib_dealloc_pd(dev->ib_pd); out_free_dev: - if (pool->ops && pool->ops->free) - pool->ops->free(dev); - else - kfree(dev); + kfree(dev); out_err: return NULL; } diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 1075c2ac8fe209deb703a97f38e5593791cfefe9..b4d6a4a5ae81e6e33e6b87b0a250c923c8b00b9e 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3410,7 +3410,8 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_PKEY: - if (match_hex(args, &token)) { + ret = match_hex(args, &token); + if (ret) { pr_warn("bad P_Key parameter '%s'\n", p); goto out; } @@ -3470,7 +3471,8 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_MAX_SECT: - if (match_int(args, &token)) { + ret = match_int(args, &token); + if (ret) { pr_warn("bad max sect parameter '%s'\n", p); goto out; } @@ -3478,8 +3480,15 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_QUEUE_SIZE: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad queue_size parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->scsi_host->can_queue = token; @@ -3490,25 +3499,40 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_MAX_CMD_PER_LUN: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad max cmd_per_lun parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->scsi_host->cmd_per_lun = token; break; case SRP_OPT_TARGET_CAN_QUEUE: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad max target_can_queue parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->target_can_queue = token; break; case SRP_OPT_IO_CLASS: - if (match_hex(args, &token)) { + ret = match_hex(args, &token); + if (ret) { pr_warn("bad IO class parameter '%s'\n", p); goto out; } @@ -3517,6 +3541,7 @@ static int srp_parse_options(struct net *net, const char *buf, pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n", token, SRP_REV10_IB_IO_CLASS, SRP_REV16A_IB_IO_CLASS); + ret = -EINVAL; goto out; } target->io_class = token; @@ -3539,16 +3564,24 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_CMD_SG_ENTRIES: - if (match_int(args, &token) || token < 1 || token > 255) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > 255) { pr_warn("bad max cmd_sg_entries parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->cmd_sg_cnt = token; break; case SRP_OPT_ALLOW_EXT_SG: - if (match_int(args, &token)) { + ret = match_int(args, &token); + if (ret) { pr_warn("bad allow_ext_sg parameter '%s'\n", p); goto out; } @@ -3556,43 +3589,77 @@ static int srp_parse_options(struct net *net, const char *buf, break; case SRP_OPT_SG_TABLESIZE: - if (match_int(args, &token) || token < 1 || - token > SG_MAX_SEGMENTS) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1 || token > SG_MAX_SEGMENTS) { pr_warn("bad max sg_tablesize parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->sg_tablesize = token; break; case SRP_OPT_COMP_VECTOR: - if (match_int(args, &token) || token < 0) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { pr_warn("bad comp_vector parameter '%s'\n", p); + ret = -EINVAL; goto out; } target->comp_vector = token; break; case SRP_OPT_TL_RETRY_COUNT: - if (match_int(args, &token) || token < 2 || token > 7) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 2 || token > 7) { pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n", p); + ret = -EINVAL; goto out; } target->tl_retry_count = token; break; case SRP_OPT_MAX_IT_IU_SIZE: - if (match_int(args, &token) || token < 0) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 0) { pr_warn("bad maximum initiator to target IU size '%s'\n", p); + ret = -EINVAL; goto out; } target->max_it_iu_size = token; break; case SRP_OPT_CH_COUNT: - if (match_int(args, &token) || token < 1) { + ret = match_int(args, &token); + if (ret) { + pr_warn("match_int() failed for channel count parameter '%s', Error %d\n", + p, ret); + goto out; + } + if (token < 1) { pr_warn("bad channel count %s\n", p); + ret = -EINVAL; goto out; } target->ch_count = token; @@ -3601,6 +3668,7 @@ static int srp_parse_options(struct net *net, const char *buf, default: pr_warn("unknown parameter or missing value '%s' in target creation request\n", p); + ret = -EINVAL; goto out; } } diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 453da65262af40769131a8f801f5a56f2a4d94b5..79707685d54a4f8f038f04d6d267d27c199e7ab9 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -188,6 +188,7 @@ config MSM_IOMMU source "drivers/iommu/amd/Kconfig" source "drivers/iommu/intel/Kconfig" +source "drivers/iommu/iommufd/Kconfig" config IRQ_REMAP bool "Support for Interrupt Remapping" diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index cc9f381013c35fd1fcc44a9f280dad8cdf48730b..f461d065138564e4bacad8bd882a148742067cd3 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 -obj-y += amd/ intel/ arm/ +obj-y += amd/ intel/ arm/ iommufd/ obj-$(CONFIG_IOMMU_API) += iommu.o obj-$(CONFIG_IOMMU_API) += iommu-traces.o obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o @@ -28,6 +28,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o -obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o +obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o obj-$(CONFIG_APPLE_DART) += apple-dart.o diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 4d28967f910d96d40236c0d76f7e658d9146e2ca..8d37d9087fab285b566e398bcde0c2fdbf09cab1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2155,21 +2155,13 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, static int amd_iommu_attach_device(struct iommu_domain *dom, struct device *dev) { + struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); struct protection_domain *domain = to_pdomain(dom); - struct iommu_dev_data *dev_data; - struct amd_iommu *iommu; + struct amd_iommu *iommu = rlookup_amd_iommu(dev); int ret; - if (!check_device(dev)) - return -EINVAL; - - dev_data = dev_iommu_priv_get(dev); dev_data->defer_attach = false; - iommu = rlookup_amd_iommu(dev); - if (!iommu) - return -EINVAL; - if (dev_data->domain) detach_device(dev); @@ -2286,6 +2278,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) return false; case IOMMU_CAP_PRE_BOOT_PROTECTION: return amdr_ivrs_remap_support; + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return true; default: break; } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 5968a568aae2afea4d8358c8ecfbb7c69dfaa296..a5a63b1c947eb1daaec72db4ba7c6ca6f5d5c6a4 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -10,7 +10,7 @@ #include #include "arm-smmu-v3.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" #include "../../io-pgtable-arm.h" struct arm_smmu_mmu_notifier { @@ -344,11 +344,6 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) if (!bond) return ERR_PTR(-ENOMEM); - /* Allocate a PASID for this mm if necessary */ - ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1); - if (ret) - goto err_free_bond; - bond->mm = mm; bond->sva.dev = dev; refcount_set(&bond->refs, 1); @@ -367,42 +362,6 @@ err_free_bond: return ERR_PTR(ret); } -struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct iommu_sva *handle; - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); - - if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) - return ERR_PTR(-EINVAL); - - mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); - mutex_unlock(&sva_lock); - return handle; -} - -void arm_smmu_sva_unbind(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - mutex_lock(&sva_lock); - if (refcount_dec_and_test(&bond->refs)) { - list_del(&bond->list); - arm_smmu_mmu_notifier_put(bond->smmu_mn); - kfree(bond); - } - mutex_unlock(&sva_lock); -} - -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) -{ - struct arm_smmu_bond *bond = sva_to_bond(handle); - - return bond->mm->pasid; -} - bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { unsigned long reg, fld; @@ -550,3 +509,64 @@ void arm_smmu_sva_notifier_synchronize(void) */ mmu_notifier_synchronize(); } + +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + struct mm_struct *mm = domain->mm; + struct arm_smmu_bond *bond = NULL, *t; + struct arm_smmu_master *master = dev_iommu_priv_get(dev); + + mutex_lock(&sva_lock); + list_for_each_entry(t, &master->bonds, list) { + if (t->mm == mm) { + bond = t; + break; + } + } + + if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { + list_del(&bond->list); + arm_smmu_mmu_notifier_put(bond->smmu_mn); + kfree(bond); + } + mutex_unlock(&sva_lock); +} + +static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id) +{ + int ret = 0; + struct iommu_sva *handle; + struct mm_struct *mm = domain->mm; + + mutex_lock(&sva_lock); + handle = __arm_smmu_sva_bind(dev, mm); + if (IS_ERR(handle)) + ret = PTR_ERR(handle); + mutex_unlock(&sva_lock); + + return ret; +} + +static void arm_smmu_sva_domain_free(struct iommu_domain *domain) +{ + kfree(domain); +} + +static const struct iommu_domain_ops arm_smmu_sva_domain_ops = { + .set_dev_pasid = arm_smmu_sva_set_dev_pasid, + .free = arm_smmu_sva_domain_free +}; + +struct iommu_domain *arm_smmu_sva_domain_alloc(void) +{ + struct iommu_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->ops = &arm_smmu_sva_domain_ops; + + return domain; +} diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 6d5df91c5c465a4314f6a8bc41ec969d3d84c910..ab160198edd6b1d861af9459632d7da020d18ca9 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -29,7 +29,7 @@ #include "arm-smmu-v3.h" #include "../../dma-iommu.h" -#include "../../iommu-sva-lib.h" +#include "../../iommu-sva.h" static bool disable_bypass = true; module_param(disable_bypass, bool, 0444); @@ -2009,6 +2009,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; + if (type == IOMMU_DOMAIN_SVA) + return arm_smmu_sva_domain_alloc(); + if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA && type != IOMMU_DOMAIN_DMA_FQ && @@ -2430,23 +2433,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } } else if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s (upstream of %s)\n", - dev_name(smmu_domain->smmu->dev), - dev_name(smmu->dev)); - ret = -ENXIO; + ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { - dev_err(dev, - "cannot attach to incompatible domain (%u SSID bits != %u)\n", - smmu_domain->s1_cfg.s1cdmax, master->ssid_bits); ret = -EINVAL; goto out_unlock; } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && smmu_domain->stall_enabled != master->stall_enabled) { - dev_err(dev, "cannot attach to stall-%s domain\n", - smmu_domain->stall_enabled ? "enabled" : "disabled"); ret = -EINVAL; goto out_unlock; } @@ -2838,6 +2832,17 @@ static int arm_smmu_def_domain_type(struct device *dev) return 0; } +static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct iommu_domain *domain; + + domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA); + if (WARN_ON(IS_ERR(domain)) || !domain) + return; + + arm_smmu_sva_remove_dev_pasid(domain, dev, pasid); +} + static struct iommu_ops arm_smmu_ops = { .capable = arm_smmu_capable, .domain_alloc = arm_smmu_domain_alloc, @@ -2846,11 +2851,9 @@ static struct iommu_ops arm_smmu_ops = { .device_group = arm_smmu_device_group, .of_xlate = arm_smmu_of_xlate, .get_resv_regions = arm_smmu_get_resv_regions, + .remove_dev_pasid = arm_smmu_remove_dev_pasid, .dev_enable_feat = arm_smmu_dev_enable_feature, .dev_disable_feat = arm_smmu_dev_disable_feature, - .sva_bind = arm_smmu_sva_bind, - .sva_unbind = arm_smmu_sva_unbind, - .sva_get_pasid = arm_smmu_sva_get_pasid, .page_response = arm_smmu_page_response, .def_domain_type = arm_smmu_def_domain_type, .pgsize_bitmap = -1UL, /* Restricted during device attach */ @@ -3543,6 +3546,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu) /* SID/SSID sizes */ smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg); smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg); + smmu->iommu.max_pasids = 1UL << smmu->ssid_bits; /* * If the SMMU supports fewer bits than would fill a single L2 stream diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index cd48590ada3039ad70eadbc1edaa8bf877ce74ff..8d772ea8a58367b690e91b2a7fbea251b11b93c2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -754,11 +754,10 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master); int arm_smmu_master_enable_sva(struct arm_smmu_master *master); int arm_smmu_master_disable_sva(struct arm_smmu_master *master); bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master); -struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void arm_smmu_sva_unbind(struct iommu_sva *handle); -u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle); void arm_smmu_sva_notifier_synchronize(void); +struct iommu_domain *arm_smmu_sva_domain_alloc(void); +void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t id); #else /* CONFIG_ARM_SMMU_V3_SVA */ static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) { @@ -790,19 +789,17 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master return false; } -static inline struct iommu_sva * -arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata) +static inline void arm_smmu_sva_notifier_synchronize(void) {} + +static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void) { - return ERR_PTR(-ENODEV); + return NULL; } -static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {} - -static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle) +static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, + struct device *dev, + ioasid_t id) { - return IOMMU_PASID_INVALID; } - -static inline void arm_smmu_sva_notifier_synchronize(void) {} #endif /* CONFIG_ARM_SMMU_V3_SVA */ #endif /* _ARM_SMMU_V3_H */ diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 30dab1418e3ff09c0bc62d5c5058d1f37a3a197c..719fbca1fe52a0b329ccac1c3f8f1001b11194c7 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1150,9 +1150,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) * different SMMUs. */ if (smmu_domain->smmu != smmu) { - dev_err(dev, - "cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n", - dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev)); ret = -EINVAL; goto rpm_put; } diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 3869c3ecda8cd1193cfdbeec0ac2d235ed3d04de..bfd7b51eb5dbfffab0565da800988b28e8df89bd 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -381,13 +381,8 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev * Sanity check the domain. We don't support domains across * different IOMMUs. */ - if (qcom_domain->iommu != qcom_iommu) { - dev_err(dev, "cannot attach to IOMMU %s while already " - "attached to domain on IOMMU %s\n", - dev_name(qcom_domain->iommu->dev), - dev_name(qcom_iommu->dev)); + if (qcom_domain->iommu != qcom_iommu) return -EINVAL; - } return 0; } diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c index 0d03f837a5d4e7ff820a223dd4a6349424a42647..2eb3211c8167792f4147271a4ab3eac69f776d4d 100644 --- a/drivers/iommu/fsl_pamu.c +++ b/drivers/iommu/fsl_pamu.c @@ -211,7 +211,7 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot) ppaace->op_encode.index_ot.omi = omi; } else if (~omi != 0) { pr_debug("bad operation mapping index: %d\n", omi); - return -EINVAL; + return -ENODEV; } /* configure stash id */ diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index fa20f4b03e12db026b41e8017f7859d4b9e227bd..4408ac3c49b610e13a3b7158b61f7e117a917394 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -258,7 +258,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, liodn = of_get_property(dev->of_node, "fsl,liodn", &len); if (!liodn) { pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node); - return -EINVAL; + return -ENODEV; } spin_lock_irqsave(&dma_domain->domain_lock, flags); @@ -267,7 +267,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain, if (liodn[i] >= PAACE_NUMBER_ENTRIES) { pr_debug("Invalid liodn %d, attach device failed for %pOF\n", liodn[i], dev->of_node); - ret = -EINVAL; + ret = -ENODEV; break; } diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index bc94059a5b870abbd28b8e92fe7116170eb53259..b00a0ceb2d13701841b06b44c946bd48e07e87a1 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -1105,6 +1105,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) raw_spin_lock_init(&iommu->register_lock); + /* + * A value of N in PSS field of eCap register indicates hardware + * supports PASID field of N+1 bits. + */ + if (pasid_supported(iommu)) + iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap); + /* * This is only for hotplug; at boot time intel_iommu_enabled won't * be set yet. When intel_iommu_init() runs, it registers the units diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 644ca49e8cf80a92ba344455c509115853058ad2..bef8e8f7ca258f7509104a85d2bf1071005890b0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -27,7 +27,7 @@ #include "iommu.h" #include "../dma-iommu.h" #include "../irq_remapping.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "pasid.h" #include "cap_audit.h" @@ -4188,6 +4188,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type) return domain; case IOMMU_DOMAIN_IDENTITY: return &si_domain->domain; + case IOMMU_DOMAIN_SVA: + return intel_svm_domain_alloc(); default: return NULL; } @@ -4213,19 +4215,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain, return -ENODEV; if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap)) - return -EOPNOTSUPP; + return -EINVAL; /* check if this iommu agaw is sufficient for max mapped address */ addr_width = agaw_to_width(iommu->agaw); if (addr_width > cap_mgaw(iommu->cap)) addr_width = cap_mgaw(iommu->cap); - if (dmar_domain->max_addr > (1LL << addr_width)) { - dev_err(dev, "%s: iommu width (%d) is not " - "sufficient for the mapped address (%llx)\n", - __func__, addr_width, dmar_domain->max_addr); - return -EFAULT; - } + if (dmar_domain->max_addr > (1LL << addr_width)) + return -EINVAL; dmar_domain->gaw = addr_width; /* @@ -4471,14 +4469,20 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) { - if (cap == IOMMU_CAP_CACHE_COHERENCY) + struct device_domain_info *info = dev_iommu_priv_get(dev); + + switch (cap) { + case IOMMU_CAP_CACHE_COHERENCY: return true; - if (cap == IOMMU_CAP_INTR_REMAP) + case IOMMU_CAP_INTR_REMAP: return irq_remapping_enabled == 1; - if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION) + case IOMMU_CAP_PRE_BOOT_PROTECTION: return dmar_platform_optin(); - - return false; + case IOMMU_CAP_ENFORCE_CACHE_COHERENCY: + return ecap_sc_support(info->iommu->ecap); + default: + return false; + } } static struct iommu_device *intel_iommu_probe_device(struct device *dev) @@ -4732,6 +4736,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); } +static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); + struct iommu_domain *domain; + + /* Domain type specific cleanup: */ + domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0); + if (domain) { + switch (domain->type) { + case IOMMU_DOMAIN_SVA: + intel_svm_remove_dev_pasid(dev, pasid); + break; + default: + /* should never reach here */ + WARN_ON(1); + break; + } + } + + intel_pasid_tear_down_entry(iommu, dev, pasid, false); +} + const struct iommu_ops intel_iommu_ops = { .capable = intel_iommu_capable, .domain_alloc = intel_iommu_domain_alloc, @@ -4744,11 +4770,9 @@ const struct iommu_ops intel_iommu_ops = { .dev_disable_feat = intel_iommu_dev_disable_feat, .is_attach_deferred = intel_iommu_is_attach_deferred, .def_domain_type = device_def_domain_type, + .remove_dev_pasid = intel_iommu_remove_dev_pasid, .pgsize_bitmap = SZ_4K, #ifdef CONFIG_INTEL_IOMMU_SVM - .sva_bind = intel_svm_bind, - .sva_unbind = intel_svm_unbind, - .sva_get_pasid = intel_svm_get_pasid, .page_response = intel_svm_page_response, #endif .default_domain_ops = &(const struct iommu_domain_ops) { diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index b3d82d7093e6b4aa458a2c1d85cc804d4807a335..f83ad8ddcf4d96dcb12fca9fde77daee6a4a0b89 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -480,8 +480,6 @@ enum { #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) #define VTD_FLAG_SVM_CAPABLE (1 << 2) -extern int intel_iommu_sm; - #define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap)) #define pasid_supported(iommu) (sm_supported(iommu) && \ ecap_pasid((iommu)->ecap)) @@ -753,12 +751,10 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn); extern void intel_svm_check(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, - void *drvdata); -void intel_svm_unbind(struct iommu_sva *handle); -u32 intel_svm_get_pasid(struct iommu_sva *handle); int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg); +struct iommu_domain *intel_svm_domain_alloc(void); +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid); struct intel_svm_dev { struct list_head list; @@ -783,6 +779,14 @@ struct intel_svm { }; #else static inline void intel_svm_check(struct intel_iommu *iommu) {} +static inline struct iommu_domain *intel_svm_domain_alloc(void) +{ + return NULL; +} + +static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ +} #endif #ifdef CONFIG_INTEL_IOMMU_DEBUGFS @@ -798,6 +802,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, extern const struct iommu_ops intel_iommu_ops; #ifdef CONFIG_INTEL_IOMMU +extern int intel_iommu_sm; extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; @@ -813,6 +818,7 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu) } #define dmar_disabled (1) #define intel_iommu_enabled (0) +#define intel_iommu_sm (0) #endif static inline const char *decode_prq_descriptor(char *str, size_t size, diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index e13d7e5273e195cc0bc84019e650e4c7e7a5523c..fb3c7020028d07cc4e8b598f25b4b4f185c67376 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -101,8 +101,10 @@ int intel_pasid_alloc_table(struct device *dev) might_sleep(); info = dev_iommu_priv_get(dev); - if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) - return -EINVAL; + if (WARN_ON(!info || !dev_is_pci(dev))) + return -ENODEV; + if (WARN_ON(info->pasid_table)) + return -EEXIST; pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); if (!pasid_table) diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 03b25358946c4fa00d101f69a8178effc61d7664..c76b66263467aefa12df8aee8ca1d09963933465 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -24,7 +24,7 @@ #include "iommu.h" #include "pasid.h" #include "perf.h" -#include "../iommu-sva-lib.h" +#include "../iommu-sva.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -299,19 +299,9 @@ out: return 0; } -static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, - unsigned int flags) -{ - ioasid_t max_pasid = dev_is_pci(dev) ? - pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; - - return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); -} - static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, - struct mm_struct *mm, - unsigned int flags) + struct mm_struct *mm) { struct device_domain_info *info = dev_iommu_priv_get(dev); struct intel_svm_dev *sdev; @@ -327,22 +317,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, svm->pasid = mm->pasid; svm->mm = mm; - svm->flags = flags; INIT_LIST_HEAD_RCU(&svm->devs); - if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { - svm->notifier.ops = &intel_mmuops; - ret = mmu_notifier_register(&svm->notifier, mm); - if (ret) { - kfree(svm); - return ERR_PTR(ret); - } + svm->notifier.ops = &intel_mmuops; + ret = mmu_notifier_register(&svm->notifier, mm); + if (ret) { + kfree(svm); + return ERR_PTR(ret); } ret = pasid_private_add(svm->pasid, svm); if (ret) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); kfree(svm); return ERR_PTR(ret); } @@ -377,9 +363,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, } /* Setup the pasid table: */ - sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? - PASID_FLAG_SUPERVISOR_MODE : 0; - sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; + sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, FLPT_DEFAULT_DID, sflags); if (ret) @@ -393,8 +377,7 @@ free_sdev: kfree(sdev); free_svm: if (list_empty(&svm->devs)) { - if (svm->notifier.ops) - mmu_notifier_unregister(&svm->notifier, mm); + mmu_notifier_unregister(&svm->notifier, mm); pasid_private_remove(mm->pasid); kfree(svm); } @@ -787,67 +770,6 @@ prq_advance: return IRQ_RETVAL(handled); } -struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); - unsigned int flags = 0; - struct iommu_sva *sva; - int ret; - - if (drvdata) - flags = *(unsigned int *)drvdata; - - if (flags & SVM_FLAG_SUPERVISOR_MODE) { - if (!ecap_srs(iommu->ecap)) { - dev_err(dev, "%s: Supervisor PASID not supported\n", - iommu->name); - return ERR_PTR(-EOPNOTSUPP); - } - - if (mm) { - dev_err(dev, "%s: Supervisor PASID with user provided mm\n", - iommu->name); - return ERR_PTR(-EINVAL); - } - - mm = &init_mm; - } - - mutex_lock(&pasid_mutex); - ret = intel_svm_alloc_pasid(dev, mm, flags); - if (ret) { - mutex_unlock(&pasid_mutex); - return ERR_PTR(ret); - } - - sva = intel_svm_bind_mm(iommu, dev, mm, flags); - mutex_unlock(&pasid_mutex); - - return sva; -} - -void intel_svm_unbind(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev = to_intel_svm_dev(sva); - - mutex_lock(&pasid_mutex); - intel_svm_unbind_mm(sdev->dev, sdev->pasid); - mutex_unlock(&pasid_mutex); -} - -u32 intel_svm_get_pasid(struct iommu_sva *sva) -{ - struct intel_svm_dev *sdev; - u32 pasid; - - mutex_lock(&pasid_mutex); - sdev = to_intel_svm_dev(sva); - pasid = sdev->pasid; - mutex_unlock(&pasid_mutex); - - return pasid; -} - int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt, struct iommu_page_response *msg) @@ -918,3 +840,50 @@ int intel_svm_page_response(struct device *dev, out: return ret; } + +void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) +{ + mutex_lock(&pasid_mutex); + intel_svm_unbind_mm(dev, pasid); + mutex_unlock(&pasid_mutex); +} + +static int intel_svm_set_dev_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev); + struct intel_iommu *iommu = info->iommu; + struct mm_struct *mm = domain->mm; + struct iommu_sva *sva; + int ret = 0; + + mutex_lock(&pasid_mutex); + sva = intel_svm_bind_mm(iommu, dev, mm); + if (IS_ERR(sva)) + ret = PTR_ERR(sva); + mutex_unlock(&pasid_mutex); + + return ret; +} + +static void intel_svm_domain_free(struct iommu_domain *domain) +{ + kfree(to_dmar_domain(domain)); +} + +static const struct iommu_domain_ops intel_svm_domain_ops = { + .set_dev_pasid = intel_svm_set_dev_pasid, + .free = intel_svm_domain_free +}; + +struct iommu_domain *intel_svm_domain_alloc(void) +{ + struct dmar_domain *domain; + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + domain->domain.ops = &intel_svm_domain_ops; + + return &domain->domain; +} diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c index 1df8c1dcae7761feb78238ba8bfcfd78a6ed3beb..e5b8b9110c132aa88444e52fd75585aba1945bab 100644 --- a/drivers/iommu/io-pgfault.c +++ b/drivers/iommu/io-pgfault.c @@ -11,7 +11,7 @@ #include #include -#include "iommu-sva-lib.h" +#include "iommu-sva.h" /** * struct iopf_queue - IO Page Fault queue @@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf, return iommu_page_response(dev, &resp); } -static enum iommu_page_response_code -iopf_handle_single(struct iopf_fault *iopf) -{ - vm_fault_t ret; - struct mm_struct *mm; - struct vm_area_struct *vma; - unsigned int access_flags = 0; - unsigned int fault_flags = FAULT_FLAG_REMOTE; - struct iommu_fault_page_request *prm = &iopf->fault.prm; - enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; - - if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) - return status; - - mm = iommu_sva_find(prm->pasid); - if (IS_ERR_OR_NULL(mm)) - return status; - - mmap_read_lock(mm); - - vma = find_extend_vma(mm, prm->addr); - if (!vma) - /* Unmapped area */ - goto out_put_mm; - - if (prm->perm & IOMMU_FAULT_PERM_READ) - access_flags |= VM_READ; - - if (prm->perm & IOMMU_FAULT_PERM_WRITE) { - access_flags |= VM_WRITE; - fault_flags |= FAULT_FLAG_WRITE; - } - - if (prm->perm & IOMMU_FAULT_PERM_EXEC) { - access_flags |= VM_EXEC; - fault_flags |= FAULT_FLAG_INSTRUCTION; - } - - if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) - fault_flags |= FAULT_FLAG_USER; - - if (access_flags & ~vma->vm_flags) - /* Access fault */ - goto out_put_mm; - - ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); - status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : - IOMMU_PAGE_RESP_SUCCESS; - -out_put_mm: - mmap_read_unlock(mm); - mmput(mm); - - return status; -} - -static void iopf_handle_group(struct work_struct *work) +static void iopf_handler(struct work_struct *work) { struct iopf_group *group; + struct iommu_domain *domain; struct iopf_fault *iopf, *next; enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS; group = container_of(work, struct iopf_group, work); + domain = iommu_get_domain_for_dev_pasid(group->dev, + group->last_fault.fault.prm.pasid, 0); + if (!domain || !domain->iopf_handler) + status = IOMMU_PAGE_RESP_INVALID; list_for_each_entry_safe(iopf, next, &group->faults, list) { /* @@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work) * faults in the group if there is an error. */ if (status == IOMMU_PAGE_RESP_SUCCESS) - status = iopf_handle_single(iopf); + status = domain->iopf_handler(&iopf->fault, + domain->fault_data); if (!(iopf->fault.prm.flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE)) @@ -181,6 +131,13 @@ static void iopf_handle_group(struct work_struct *work) * request completes, outstanding faults will have been dealt with by the time * the PASID is freed. * + * Any valid page fault will be eventually routed to an iommu domain and the + * page fault handler installed there will get called. The users of this + * handling framework should guarantee that the iommu domain could only be + * freed after the device has stopped generating page faults (or the iommu + * hardware has been set to block the page faults) and the pending page faults + * have been flushed. + * * Return: 0 on success and <0 on error. */ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) @@ -235,7 +192,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) group->last_fault.fault = *fault; INIT_LIST_HEAD(&group->faults); list_add(&group->last_fault.list, &group->faults); - INIT_WORK(&group->work, iopf_handle_group); + INIT_WORK(&group->work, iopf_handler); /* See if we have partial faults for this group */ list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) { diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c deleted file mode 100644 index 10650614389605b6442b471af36546fe8d29903f..0000000000000000000000000000000000000000 --- a/drivers/iommu/iommu-sva-lib.c +++ /dev/null @@ -1,71 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Helpers for IOMMU drivers implementing SVA - */ -#include -#include - -#include "iommu-sva-lib.h" - -static DEFINE_MUTEX(iommu_sva_lock); -static DECLARE_IOASID_SET(iommu_sva_pasid); - -/** - * iommu_sva_alloc_pasid - Allocate a PASID for the mm - * @mm: the mm - * @min: minimum PASID value (inclusive) - * @max: maximum PASID value (inclusive) - * - * Try to allocate a PASID for this mm, or take a reference to the existing one - * provided it fits within the [@min, @max] range. On success the PASID is - * available in mm->pasid and will be available for the lifetime of the mm. - * - * Returns 0 on success and < 0 on error. - */ -int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) -{ - int ret = 0; - ioasid_t pasid; - - if (min == INVALID_IOASID || max == INVALID_IOASID || - min == 0 || max < min) - return -EINVAL; - - mutex_lock(&iommu_sva_lock); - /* Is a PASID already associated with this mm? */ - if (pasid_valid(mm->pasid)) { - if (mm->pasid < min || mm->pasid >= max) - ret = -EOVERFLOW; - goto out; - } - - pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); - if (!pasid_valid(pasid)) - ret = -ENOMEM; - else - mm_pasid_set(mm, pasid); -out: - mutex_unlock(&iommu_sva_lock); - return ret; -} -EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); - -/* ioasid_find getter() requires a void * argument */ -static bool __mmget_not_zero(void *mm) -{ - return mmget_not_zero(mm); -} - -/** - * iommu_sva_find() - Find mm associated to the given PASID - * @pasid: Process Address Space ID assigned to the mm - * - * On success a reference to the mm is taken, and must be released with mmput(). - * - * Returns the mm corresponding to this PASID, or an error if not found. - */ -struct mm_struct *iommu_sva_find(ioasid_t pasid) -{ - return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); -} -EXPORT_SYMBOL_GPL(iommu_sva_find); diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c new file mode 100644 index 0000000000000000000000000000000000000000..24bf9b2b58aa6d2c2b84f390290b1d9253994d8e --- /dev/null +++ b/drivers/iommu/iommu-sva.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Helpers for IOMMU drivers implementing SVA + */ +#include +#include +#include + +#include "iommu-sva.h" + +static DEFINE_MUTEX(iommu_sva_lock); +static DECLARE_IOASID_SET(iommu_sva_pasid); + +/** + * iommu_sva_alloc_pasid - Allocate a PASID for the mm + * @mm: the mm + * @min: minimum PASID value (inclusive) + * @max: maximum PASID value (inclusive) + * + * Try to allocate a PASID for this mm, or take a reference to the existing one + * provided it fits within the [@min, @max] range. On success the PASID is + * available in mm->pasid and will be available for the lifetime of the mm. + * + * Returns 0 on success and < 0 on error. + */ +int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) +{ + int ret = 0; + ioasid_t pasid; + + if (min == INVALID_IOASID || max == INVALID_IOASID || + min == 0 || max < min) + return -EINVAL; + + mutex_lock(&iommu_sva_lock); + /* Is a PASID already associated with this mm? */ + if (pasid_valid(mm->pasid)) { + if (mm->pasid < min || mm->pasid >= max) + ret = -EOVERFLOW; + goto out; + } + + pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); + if (!pasid_valid(pasid)) + ret = -ENOMEM; + else + mm_pasid_set(mm, pasid); +out: + mutex_unlock(&iommu_sva_lock); + return ret; +} +EXPORT_SYMBOL_GPL(iommu_sva_alloc_pasid); + +/* ioasid_find getter() requires a void * argument */ +static bool __mmget_not_zero(void *mm) +{ + return mmget_not_zero(mm); +} + +/** + * iommu_sva_find() - Find mm associated to the given PASID + * @pasid: Process Address Space ID assigned to the mm + * + * On success a reference to the mm is taken, and must be released with mmput(). + * + * Returns the mm corresponding to this PASID, or an error if not found. + */ +struct mm_struct *iommu_sva_find(ioasid_t pasid) +{ + return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero); +} +EXPORT_SYMBOL_GPL(iommu_sva_find); + +/** + * iommu_sva_bind_device() - Bind a process address space to a device + * @dev: the device + * @mm: the mm to bind, caller must hold a reference to mm_users + * + * Create a bond between device and address space, allowing the device to + * access the mm using the PASID returned by iommu_sva_get_pasid(). If a + * bond already exists between @device and @mm, an additional internal + * reference is taken. Caller must call iommu_sva_unbind_device() + * to release each reference. + * + * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to + * initialize the required SVA features. + * + * On error, returns an ERR_PTR value. + */ +struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm) +{ + struct iommu_domain *domain; + struct iommu_sva *handle; + ioasid_t max_pasids; + int ret; + + max_pasids = dev->iommu->max_pasids; + if (!max_pasids) + return ERR_PTR(-EOPNOTSUPP); + + /* Allocate mm->pasid if necessary. */ + ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1); + if (ret) + return ERR_PTR(ret); + + handle = kzalloc(sizeof(*handle), GFP_KERNEL); + if (!handle) + return ERR_PTR(-ENOMEM); + + mutex_lock(&iommu_sva_lock); + /* Search for an existing domain. */ + domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid, + IOMMU_DOMAIN_SVA); + if (IS_ERR(domain)) { + ret = PTR_ERR(domain); + goto out_unlock; + } + + if (domain) { + domain->users++; + goto out; + } + + /* Allocate a new domain and set it on device pasid. */ + domain = iommu_sva_domain_alloc(dev, mm); + if (!domain) { + ret = -ENOMEM; + goto out_unlock; + } + + ret = iommu_attach_device_pasid(domain, dev, mm->pasid); + if (ret) + goto out_free_domain; + domain->users = 1; +out: + mutex_unlock(&iommu_sva_lock); + handle->dev = dev; + handle->domain = domain; + + return handle; + +out_free_domain: + iommu_domain_free(domain); +out_unlock: + mutex_unlock(&iommu_sva_lock); + kfree(handle); + + return ERR_PTR(ret); +} +EXPORT_SYMBOL_GPL(iommu_sva_bind_device); + +/** + * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device + * @handle: the handle returned by iommu_sva_bind_device() + * + * Put reference to a bond between device and address space. The device should + * not be issuing any more transaction for this PASID. All outstanding page + * requests for this PASID must have been flushed to the IOMMU. + */ +void iommu_sva_unbind_device(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + ioasid_t pasid = domain->mm->pasid; + struct device *dev = handle->dev; + + mutex_lock(&iommu_sva_lock); + if (--domain->users == 0) { + iommu_detach_device_pasid(domain, dev, pasid); + iommu_domain_free(domain); + } + mutex_unlock(&iommu_sva_lock); + kfree(handle); +} +EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); + +u32 iommu_sva_get_pasid(struct iommu_sva *handle) +{ + struct iommu_domain *domain = handle->domain; + + return domain->mm->pasid; +} +EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); + +/* + * I/O page fault handler for SVA + */ +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + vm_fault_t ret; + struct vm_area_struct *vma; + struct mm_struct *mm = data; + unsigned int access_flags = 0; + unsigned int fault_flags = FAULT_FLAG_REMOTE; + struct iommu_fault_page_request *prm = &fault->prm; + enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID; + + if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID)) + return status; + + if (!mmget_not_zero(mm)) + return status; + + mmap_read_lock(mm); + + vma = find_extend_vma(mm, prm->addr); + if (!vma) + /* Unmapped area */ + goto out_put_mm; + + if (prm->perm & IOMMU_FAULT_PERM_READ) + access_flags |= VM_READ; + + if (prm->perm & IOMMU_FAULT_PERM_WRITE) { + access_flags |= VM_WRITE; + fault_flags |= FAULT_FLAG_WRITE; + } + + if (prm->perm & IOMMU_FAULT_PERM_EXEC) { + access_flags |= VM_EXEC; + fault_flags |= FAULT_FLAG_INSTRUCTION; + } + + if (!(prm->perm & IOMMU_FAULT_PERM_PRIV)) + fault_flags |= FAULT_FLAG_USER; + + if (access_flags & ~vma->vm_flags) + /* Access fault */ + goto out_put_mm; + + ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL); + status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID : + IOMMU_PAGE_RESP_SUCCESS; + +out_put_mm: + mmap_read_unlock(mm); + mmput(mm); + + return status; +} diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva.h similarity index 83% rename from drivers/iommu/iommu-sva-lib.h rename to drivers/iommu/iommu-sva.h index 8909ea1094e3a842a92e289a8573cae7859c47fa..7215a761b96282c4e22e2e0b74b3d635472d0a3e 100644 --- a/drivers/iommu/iommu-sva-lib.h +++ b/drivers/iommu/iommu-sva.h @@ -2,8 +2,8 @@ /* * SVA library for IOMMU drivers */ -#ifndef _IOMMU_SVA_LIB_H -#define _IOMMU_SVA_LIB_H +#ifndef _IOMMU_SVA_H +#define _IOMMU_SVA_H #include #include @@ -26,6 +26,8 @@ int iopf_queue_flush_dev(struct device *dev); struct iopf_queue *iopf_queue_alloc(const char *name); void iopf_queue_free(struct iopf_queue *queue); int iopf_queue_discard_partial(struct iopf_queue *queue); +enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data); #else /* CONFIG_IOMMU_SVA */ static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie) @@ -63,5 +65,11 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue) { return -ENODEV; } + +static inline enum iommu_page_response_code +iommu_sva_handle_iopf(struct iommu_fault *fault, void *data) +{ + return IOMMU_PAGE_RESP_INVALID; +} #endif /* CONFIG_IOMMU_SVA */ -#endif /* _IOMMU_SVA_LIB_H */ +#endif /* _IOMMU_SVA_H */ diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 65a3b3d886dc0092ddf2a73735d7bd06de7d6f66..d69ebba81bebd8d8101d0ce9febbe21e5cd27e2f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -28,9 +29,12 @@ #include #include #include +#include #include "dma-iommu.h" +#include "iommu-sva.h" + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); @@ -42,6 +46,7 @@ struct iommu_group { struct kobject kobj; struct kobject *devices_kobj; struct list_head devices; + struct xarray pasid_array; struct mutex mutex; void *iommu_data; void (*iommu_data_release)(void *iommu_data); @@ -278,6 +283,24 @@ static void dev_iommu_free(struct device *dev) kfree(param); } +static u32 dev_iommu_get_max_pasids(struct device *dev) +{ + u32 max_pasids = 0, bits = 0; + int ret; + + if (dev_is_pci(dev)) { + ret = pci_max_pasids(to_pci_dev(dev)); + if (ret > 0) + max_pasids = ret; + } else { + ret = device_property_read_u32(dev, "pasid-num-bits", &bits); + if (!ret) + max_pasids = 1UL << bits; + } + + return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids); +} + static int __iommu_probe_device(struct device *dev, struct list_head *group_list) { const struct iommu_ops *ops = dev->bus->iommu_ops; @@ -303,6 +326,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list } dev->iommu->iommu_dev = iommu_dev; + dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev); group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) { @@ -703,6 +727,7 @@ struct iommu_group *iommu_group_alloc(void) mutex_init(&group->mutex); INIT_LIST_HEAD(&group->devices); INIT_LIST_HEAD(&group->entry); + xa_init(&group->pasid_array); ret = ida_alloc(&iommu_group_ida, GFP_KERNEL); if (ret < 0) { @@ -1912,6 +1937,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc); void iommu_domain_free(struct iommu_domain *domain) { + if (domain->type == IOMMU_DOMAIN_SVA) + mmdrop(domain->mm); iommu_put_dma_cookie(domain); domain->ops->free(domain); } @@ -1949,6 +1976,18 @@ static int __iommu_attach_device(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_device - Attach an IOMMU domain to a device + * @domain: IOMMU domain to attach + * @dev: Device that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the device. In this case attaching a different domain to the + * device may succeed. + */ int iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct iommu_group *group; @@ -2075,6 +2114,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, return ret; } +/** + * iommu_attach_group - Attach an IOMMU domain to an IOMMU group + * @domain: IOMMU domain to attach + * @group: IOMMU group that will be attached + * + * Returns 0 on success and error code on failure + * + * Note that EINVAL can be treated as a soft failure, indicating + * that certain configuration of the domain is incompatible with + * the group. In this case attaching a different domain to the + * group may succeed. + */ int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group) { int ret; @@ -2726,98 +2777,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat) } EXPORT_SYMBOL_GPL(iommu_dev_disable_feature); -/** - * iommu_sva_bind_device() - Bind a process address space to a device - * @dev: the device - * @mm: the mm to bind, caller must hold a reference to it - * @drvdata: opaque data pointer to pass to bind callback - * - * Create a bond between device and address space, allowing the device to access - * the mm using the returned PASID. If a bond already exists between @device and - * @mm, it is returned and an additional reference is taken. Caller must call - * iommu_sva_unbind_device() to release each reference. - * - * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to - * initialize the required SVA features. - * - * On error, returns an ERR_PTR value. - */ -struct iommu_sva * -iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata) -{ - struct iommu_group *group; - struct iommu_sva *handle = ERR_PTR(-EINVAL); - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_bind) - return ERR_PTR(-ENODEV); - - group = iommu_group_get(dev); - if (!group) - return ERR_PTR(-ENODEV); - - /* Ensure device count and domain don't change while we're binding */ - mutex_lock(&group->mutex); - - /* - * To keep things simple, SVA currently doesn't support IOMMU groups - * with more than one device. Existing SVA-capable systems are not - * affected by the problems that required IOMMU groups (lack of ACS - * isolation, device ID aliasing and other hardware issues). - */ - if (iommu_group_device_count(group) != 1) - goto out_unlock; - - handle = ops->sva_bind(dev, mm, drvdata); - -out_unlock: - mutex_unlock(&group->mutex); - iommu_group_put(group); - - return handle; -} -EXPORT_SYMBOL_GPL(iommu_sva_bind_device); - -/** - * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device - * @handle: the handle returned by iommu_sva_bind_device() - * - * Put reference to a bond between device and address space. The device should - * not be issuing any more transaction for this PASID. All outstanding page - * requests for this PASID must have been flushed to the IOMMU. - */ -void iommu_sva_unbind_device(struct iommu_sva *handle) -{ - struct iommu_group *group; - struct device *dev = handle->dev; - const struct iommu_ops *ops = dev_iommu_ops(dev); - - if (!ops->sva_unbind) - return; - - group = iommu_group_get(dev); - if (!group) - return; - - mutex_lock(&group->mutex); - ops->sva_unbind(handle); - mutex_unlock(&group->mutex); - - iommu_group_put(group); -} -EXPORT_SYMBOL_GPL(iommu_sva_unbind_device); - -u32 iommu_sva_get_pasid(struct iommu_sva *handle) -{ - const struct iommu_ops *ops = dev_iommu_ops(handle->dev); - - if (!ops->sva_get_pasid) - return IOMMU_PASID_INVALID; - - return ops->sva_get_pasid(handle); -} -EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); - /* * Changes the default domain of an iommu group that has *only* one device * @@ -3087,7 +3046,8 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); if (group->owner_cnt) { - if (group->owner || !iommu_is_default_domain(group)) { + if (group->owner || !iommu_is_default_domain(group) || + !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; } @@ -3118,7 +3078,7 @@ void iommu_device_unuse_default_domain(struct device *dev) return; mutex_lock(&group->mutex); - if (!WARN_ON(!group->owner_cnt)) + if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array))) group->owner_cnt--; mutex_unlock(&group->mutex); @@ -3148,40 +3108,49 @@ static int __iommu_group_alloc_blocking_domain(struct iommu_group *group) return 0; } +static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner) +{ + int ret; + + if ((group->domain && group->domain != group->default_domain) || + !xa_empty(&group->pasid_array)) + return -EBUSY; + + ret = __iommu_group_alloc_blocking_domain(group); + if (ret) + return ret; + ret = __iommu_group_set_domain(group, group->blocking_domain); + if (ret) + return ret; + + group->owner = owner; + group->owner_cnt++; + return 0; +} + /** * iommu_group_claim_dma_owner() - Set DMA ownership of a group * @group: The group. * @owner: Caller specified pointer. Used for exclusive ownership. * - * This is to support backward compatibility for vfio which manages - * the dma ownership in iommu_group level. New invocations on this - * interface should be prohibited. + * This is to support backward compatibility for vfio which manages the dma + * ownership in iommu_group level. New invocations on this interface should be + * prohibited. Only a single owner may exist for a group. */ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner) { int ret = 0; + if (WARN_ON(!owner)) + return -EINVAL; + mutex_lock(&group->mutex); if (group->owner_cnt) { ret = -EPERM; goto unlock_out; - } else { - if (group->domain && group->domain != group->default_domain) { - ret = -EBUSY; - goto unlock_out; - } - - ret = __iommu_group_alloc_blocking_domain(group); - if (ret) - goto unlock_out; - - ret = __iommu_group_set_domain(group, group->blocking_domain); - if (ret) - goto unlock_out; - group->owner = owner; } - group->owner_cnt++; + ret = __iommu_take_dma_ownership(group, owner); unlock_out: mutex_unlock(&group->mutex); @@ -3190,29 +3159,91 @@ unlock_out: EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner); /** - * iommu_group_release_dma_owner() - Release DMA ownership of a group - * @group: The group. + * iommu_device_claim_dma_owner() - Set DMA ownership of a device + * @dev: The device. + * @owner: Caller specified pointer. Used for exclusive ownership. * - * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + * Claim the DMA ownership of a device. Multiple devices in the same group may + * concurrently claim ownership if they present the same owner value. Returns 0 + * on success and error code on failure */ -void iommu_group_release_dma_owner(struct iommu_group *group) +int iommu_device_claim_dma_owner(struct device *dev, void *owner) { - int ret; + struct iommu_group *group = iommu_group_get(dev); + int ret = 0; + + if (!group) + return -ENODEV; + if (WARN_ON(!owner)) + return -EINVAL; mutex_lock(&group->mutex); - if (WARN_ON(!group->owner_cnt || !group->owner)) + if (group->owner_cnt) { + if (group->owner != owner) { + ret = -EPERM; + goto unlock_out; + } + group->owner_cnt++; goto unlock_out; + } + + ret = __iommu_take_dma_ownership(group, owner); +unlock_out: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner); + +static void __iommu_release_dma_ownership(struct iommu_group *group) +{ + int ret; + + if (WARN_ON(!group->owner_cnt || !group->owner || + !xa_empty(&group->pasid_array))) + return; group->owner_cnt = 0; group->owner = NULL; ret = __iommu_group_set_domain(group, group->default_domain); WARN(ret, "iommu driver failed to attach the default domain"); +} -unlock_out: +/** + * iommu_group_release_dma_owner() - Release DMA ownership of a group + * @dev: The device + * + * Release the DMA ownership claimed by iommu_group_claim_dma_owner(). + */ +void iommu_group_release_dma_owner(struct iommu_group *group) +{ + mutex_lock(&group->mutex); + __iommu_release_dma_ownership(group); mutex_unlock(&group->mutex); } EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner); +/** + * iommu_device_release_dma_owner() - Release DMA ownership of a device + * @group: The device. + * + * Release the DMA ownership claimed by iommu_device_claim_dma_owner(). + */ +void iommu_device_release_dma_owner(struct device *dev) +{ + struct iommu_group *group = iommu_group_get(dev); + + mutex_lock(&group->mutex); + if (group->owner_cnt > 1) + group->owner_cnt--; + else + __iommu_release_dma_ownership(group); + mutex_unlock(&group->mutex); + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner); + /** * iommu_group_dma_owner_claimed() - Query group dma ownership status * @group: The group. @@ -3231,3 +3262,150 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group) return user; } EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed); + +static int __iommu_set_group_pasid(struct iommu_domain *domain, + struct iommu_group *group, ioasid_t pasid) +{ + struct group_device *device; + int ret = 0; + + list_for_each_entry(device, &group->devices, list) { + ret = domain->ops->set_dev_pasid(domain, device->dev, pasid); + if (ret) + break; + } + + return ret; +} + +static void __iommu_remove_group_pasid(struct iommu_group *group, + ioasid_t pasid) +{ + struct group_device *device; + const struct iommu_ops *ops; + + list_for_each_entry(device, &group->devices, list) { + ops = dev_iommu_ops(device->dev); + ops->remove_dev_pasid(device->dev, pasid); + } +} + +/* + * iommu_attach_device_pasid() - Attach a domain to pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * Return: 0 on success, or an error. + */ +int iommu_attach_device_pasid(struct iommu_domain *domain, + struct device *dev, ioasid_t pasid) +{ + struct iommu_group *group; + void *curr; + int ret; + + if (!domain->ops->set_dev_pasid) + return -EOPNOTSUPP; + + group = iommu_group_get(dev); + if (!group) + return -ENODEV; + + mutex_lock(&group->mutex); + curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL); + if (curr) { + ret = xa_err(curr) ? : -EBUSY; + goto out_unlock; + } + + ret = __iommu_set_group_pasid(domain, group, pasid); + if (ret) { + __iommu_remove_group_pasid(group, pasid); + xa_erase(&group->pasid_array, pasid); + } +out_unlock: + mutex_unlock(&group->mutex); + iommu_group_put(group); + + return ret; +} +EXPORT_SYMBOL_GPL(iommu_attach_device_pasid); + +/* + * iommu_detach_device_pasid() - Detach the domain from pasid of device + * @domain: the iommu domain. + * @dev: the attached device. + * @pasid: the pasid of the device. + * + * The @domain must have been attached to @pasid of the @dev with + * iommu_attach_device_pasid(). + */ +void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev, + ioasid_t pasid) +{ + struct iommu_group *group = iommu_group_get(dev); + + mutex_lock(&group->mutex); + __iommu_remove_group_pasid(group, pasid); + WARN_ON(xa_erase(&group->pasid_array, pasid) != domain); + mutex_unlock(&group->mutex); + + iommu_group_put(group); +} +EXPORT_SYMBOL_GPL(iommu_detach_device_pasid); + +/* + * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev + * @dev: the queried device + * @pasid: the pasid of the device + * @type: matched domain type, 0 for any match + * + * This is a variant of iommu_get_domain_for_dev(). It returns the existing + * domain attached to pasid of a device. Callers must hold a lock around this + * function, and both iommu_attach/detach_dev_pasid() whenever a domain of + * type is being manipulated. This API does not internally resolve races with + * attach/detach. + * + * Return: attached domain on success, NULL otherwise. + */ +struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev, + ioasid_t pasid, + unsigned int type) +{ + struct iommu_domain *domain; + struct iommu_group *group; + + group = iommu_group_get(dev); + if (!group) + return NULL; + + xa_lock(&group->pasid_array); + domain = xa_load(&group->pasid_array, pasid); + if (type && domain && domain->type != type) + domain = ERR_PTR(-EBUSY); + xa_unlock(&group->pasid_array); + iommu_group_put(group); + + return domain; +} +EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid); + +struct iommu_domain *iommu_sva_domain_alloc(struct device *dev, + struct mm_struct *mm) +{ + const struct iommu_ops *ops = dev_iommu_ops(dev); + struct iommu_domain *domain; + + domain = ops->domain_alloc(IOMMU_DOMAIN_SVA); + if (!domain) + return NULL; + + domain->type = IOMMU_DOMAIN_SVA; + mmgrab(mm); + domain->mm = mm; + domain->iopf_handler = iommu_sva_handle_iopf; + domain->fault_data = mm; + + return domain; +} diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig new file mode 100644 index 0000000000000000000000000000000000000000..8306616b6d819802f1c8f8c1807d35506316bca6 --- /dev/null +++ b/drivers/iommu/iommufd/Kconfig @@ -0,0 +1,44 @@ +# SPDX-License-Identifier: GPL-2.0-only +config IOMMUFD + tristate "IOMMU Userspace API" + select INTERVAL_TREE + select INTERVAL_TREE_SPAN_ITER + select IOMMU_API + default n + help + Provides /dev/iommu, the user API to control the IOMMU subsystem as + it relates to managing IO page tables that point at user space memory. + + If you don't know what to do here, say N. + +if IOMMUFD +config IOMMUFD_VFIO_CONTAINER + bool "IOMMUFD provides the VFIO container /dev/vfio/vfio" + depends on VFIO && !VFIO_CONTAINER + default VFIO && !VFIO_CONTAINER + help + IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on + IOMMUFD providing compatibility emulation to give the same ioctls. + It provides an option to build a kernel with legacy VFIO components + removed. + + IOMMUFD VFIO container emulation is known to lack certain features + of the native VFIO container, such as no-IOMMU support, peer-to-peer + DMA mapping, PPC IOMMU support, as well as other potentially + undiscovered gaps. This option is currently intended for the + purpose of testing IOMMUFD with unmodified userspace supporting VFIO + and making use of the Type1 VFIO IOMMU backend. General purpose + enabling of this option is currently discouraged. + + Unless testing IOMMUFD, say N here. + +config IOMMUFD_TEST + bool "IOMMU Userspace API Test support" + depends on DEBUG_KERNEL + depends on FAULT_INJECTION + depends on RUNTIME_TESTING_MENU + default n + help + This is dangerous, do not enable unless running + tools/testing/selftests/iommu +endif diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..8aeba81800c512dc9e9eb1c32b3240080b503db1 --- /dev/null +++ b/drivers/iommu/iommufd/Makefile @@ -0,0 +1,13 @@ +# SPDX-License-Identifier: GPL-2.0-only +iommufd-y := \ + device.o \ + hw_pagetable.o \ + io_pagetable.o \ + ioas.o \ + main.o \ + pages.o \ + vfio_compat.o + +iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o + +obj-$(CONFIG_IOMMUFD) += iommufd.o diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c new file mode 100644 index 0000000000000000000000000000000000000000..d81f93a321afcb7a79e5e69cb755ff008ab96f6b --- /dev/null +++ b/drivers/iommu/iommufd/device.c @@ -0,0 +1,772 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "iommufd_private.h" + +static bool allow_unsafe_interrupts; +module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC( + allow_unsafe_interrupts, + "Allow IOMMUFD to bind to devices even if the platform cannot isolate " + "the MSI interrupt window. Enabling this is a security weakness."); + +/* + * A iommufd_device object represents the binding relationship between a + * consuming driver and the iommufd. These objects are created/destroyed by + * external drivers, not by userspace. + */ +struct iommufd_device { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_hw_pagetable *hwpt; + /* Head at iommufd_hw_pagetable::devices */ + struct list_head devices_item; + /* always the physical device */ + struct device *dev; + struct iommu_group *group; + bool enforce_cache_coherency; +}; + +void iommufd_device_destroy(struct iommufd_object *obj) +{ + struct iommufd_device *idev = + container_of(obj, struct iommufd_device, obj); + + iommu_device_release_dma_owner(idev->dev); + iommu_group_put(idev->group); + iommufd_ctx_put(idev->ictx); +} + +/** + * iommufd_device_bind - Bind a physical device to an iommu fd + * @ictx: iommufd file descriptor + * @dev: Pointer to a physical device struct + * @id: Output ID number to return to userspace for this device + * + * A successful bind establishes an ownership over the device and returns + * struct iommufd_device pointer, otherwise returns error pointer. + * + * A driver using this API must set driver_managed_dma and must not touch + * the device until this routine succeeds and establishes ownership. + * + * Binding a PCI device places the entire RID under iommufd control. + * + * The caller must undo this with iommufd_device_unbind() + */ +struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx, + struct device *dev, u32 *id) +{ + struct iommufd_device *idev; + struct iommu_group *group; + int rc; + + /* + * iommufd always sets IOMMU_CACHE because we offer no way for userspace + * to restore cache coherency. + */ + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) + return ERR_PTR(-EINVAL); + + group = iommu_group_get(dev); + if (!group) + return ERR_PTR(-ENODEV); + + rc = iommu_device_claim_dma_owner(dev, ictx); + if (rc) + goto out_group_put; + + idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE); + if (IS_ERR(idev)) { + rc = PTR_ERR(idev); + goto out_release_owner; + } + idev->ictx = ictx; + iommufd_ctx_get(ictx); + idev->dev = dev; + idev->enforce_cache_coherency = + device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY); + /* The calling driver is a user until iommufd_device_unbind() */ + refcount_inc(&idev->obj.users); + /* group refcount moves into iommufd_device */ + idev->group = group; + + /* + * If the caller fails after this success it must call + * iommufd_unbind_device() which is safe since we hold this refcount. + * This also means the device is a leaf in the graph and no other object + * can take a reference on it. + */ + iommufd_object_finalize(ictx, &idev->obj); + *id = idev->obj.id; + return idev; + +out_release_owner: + iommu_device_release_dma_owner(dev); +out_group_put: + iommu_group_put(group); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD); + +/** + * iommufd_device_unbind - Undo iommufd_device_bind() + * @idev: Device returned by iommufd_device_bind() + * + * Release the device from iommufd control. The DMA ownership will return back + * to unowned with DMA controlled by the DMA API. This invalidates the + * iommufd_device pointer, other APIs that consume it must not be called + * concurrently. + */ +void iommufd_device_unbind(struct iommufd_device *idev) +{ + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD); + +static int iommufd_device_setup_msi(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt, + phys_addr_t sw_msi_start) +{ + int rc; + + /* + * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to + * call iommu_get_msi_cookie() on its behalf. This is necessary to setup + * the MSI window so iommu_dma_prepare_msi() can install pages into our + * domain after request_irq(). If it is not done interrupts will not + * work on this domain. + * + * FIXME: This is conceptually broken for iommufd since we want to allow + * userspace to change the domains, eg switch from an identity IOAS to a + * DMA IOAS. There is currently no way to create a MSI window that + * matches what the IRQ layer actually expects in a newly created + * domain. + */ + if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) { + rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start); + if (rc) + return rc; + + /* + * iommu_get_msi_cookie() can only be called once per domain, + * it returns -EBUSY on later calls. + */ + hwpt->msi_cookie = true; + } + + /* + * For historical compat with VFIO the insecure interrupt path is + * allowed if the module parameter is set. Insecure means that a MemWr + * operation from the device (eg a simple DMA) cannot trigger an + * interrupt outside this iommufd context. + */ + if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) && + !irq_domain_check_msi_remap()) { + if (!allow_unsafe_interrupts) + return -EPERM; + + dev_warn( + idev->dev, + "MSI interrupts are not secure, they cannot be isolated by the platform. " + "Check that platform features like interrupt remapping are enabled. " + "Use the \"allow_unsafe_interrupts\" module parameter to override\n"); + } + return 0; +} + +static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt, + struct iommu_group *group) +{ + struct iommufd_device *cur_dev; + + list_for_each_entry(cur_dev, &hwpt->devices, devices_item) + if (cur_dev->group == group) + return true; + return false; +} + +static int iommufd_device_do_attach(struct iommufd_device *idev, + struct iommufd_hw_pagetable *hwpt) +{ + phys_addr_t sw_msi_start = PHYS_ADDR_MAX; + int rc; + + mutex_lock(&hwpt->devices_lock); + + /* + * Try to upgrade the domain we have, it is an iommu driver bug to + * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail + * enforce_cache_coherency when there are no devices attached to the + * domain. + */ + if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) { + if (hwpt->domain->ops->enforce_cache_coherency) + hwpt->enforce_cache_coherency = + hwpt->domain->ops->enforce_cache_coherency( + hwpt->domain); + if (!hwpt->enforce_cache_coherency) { + WARN_ON(list_empty(&hwpt->devices)); + rc = -EINVAL; + goto out_unlock; + } + } + + rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev, + idev->group, &sw_msi_start); + if (rc) + goto out_unlock; + + rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start); + if (rc) + goto out_iova; + + /* + * FIXME: Hack around missing a device-centric iommu api, only attach to + * the group once for the first device that is in the group. + */ + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + rc = iommu_attach_group(hwpt->domain, idev->group); + if (rc) + goto out_iova; + + if (list_empty(&hwpt->devices)) { + rc = iopt_table_add_domain(&hwpt->ioas->iopt, + hwpt->domain); + if (rc) + goto out_detach; + } + } + + idev->hwpt = hwpt; + refcount_inc(&hwpt->obj.users); + list_add(&idev->devices_item, &hwpt->devices); + mutex_unlock(&hwpt->devices_lock); + return 0; + +out_detach: + iommu_detach_group(hwpt->domain, idev->group); +out_iova: + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); +out_unlock: + mutex_unlock(&hwpt->devices_lock); + return rc; +} + +/* + * When automatically managing the domains we search for a compatible domain in + * the iopt and if one is found use it, otherwise create a new domain. + * Automatic domain selection will never pick a manually created domain. + */ +static int iommufd_device_auto_get_domain(struct iommufd_device *idev, + struct iommufd_ioas *ioas) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + /* + * There is no differentiation when domains are allocated, so any domain + * that is willing to attach to the device is interchangeable with any + * other. + */ + mutex_lock(&ioas->mutex); + list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { + if (!hwpt->auto_domain) + continue; + + rc = iommufd_device_do_attach(idev, hwpt); + + /* + * -EINVAL means the domain is incompatible with the device. + * Other error codes should propagate to userspace as failure. + * Success means the domain is attached. + */ + if (rc == -EINVAL) + continue; + goto out_unlock; + } + + hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev->dev); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_unlock; + } + hwpt->auto_domain = true; + + rc = iommufd_device_do_attach(idev, hwpt); + if (rc) + goto out_abort; + list_add_tail(&hwpt->hwpt_item, &ioas->hwpt_list); + + mutex_unlock(&ioas->mutex); + iommufd_object_finalize(idev->ictx, &hwpt->obj); + return 0; + +out_abort: + iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj); +out_unlock: + mutex_unlock(&ioas->mutex); + return rc; +} + +/** + * iommufd_device_attach - Connect a device from an iommu_domain + * @idev: device to attach + * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE + * Output the IOMMUFD_OBJ_HW_PAGETABLE ID + * + * This connects the device to an iommu_domain, either automatically or manually + * selected. Once this completes the device could do DMA. + * + * The caller should return the resulting pt_id back to userspace. + * This function is undone by calling iommufd_device_detach(). + */ +int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id) +{ + struct iommufd_object *pt_obj; + int rc; + + pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY); + if (IS_ERR(pt_obj)) + return PTR_ERR(pt_obj); + + switch (pt_obj->type) { + case IOMMUFD_OBJ_HW_PAGETABLE: { + struct iommufd_hw_pagetable *hwpt = + container_of(pt_obj, struct iommufd_hw_pagetable, obj); + + rc = iommufd_device_do_attach(idev, hwpt); + if (rc) + goto out_put_pt_obj; + + mutex_lock(&hwpt->ioas->mutex); + list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list); + mutex_unlock(&hwpt->ioas->mutex); + break; + } + case IOMMUFD_OBJ_IOAS: { + struct iommufd_ioas *ioas = + container_of(pt_obj, struct iommufd_ioas, obj); + + rc = iommufd_device_auto_get_domain(idev, ioas); + if (rc) + goto out_put_pt_obj; + break; + } + default: + rc = -EINVAL; + goto out_put_pt_obj; + } + + refcount_inc(&idev->obj.users); + *pt_id = idev->hwpt->obj.id; + rc = 0; + +out_put_pt_obj: + iommufd_put_object(pt_obj); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD); + +/** + * iommufd_device_detach - Disconnect a device to an iommu_domain + * @idev: device to detach + * + * Undo iommufd_device_attach(). This disconnects the idev from the previously + * attached pt_id. The device returns back to a blocked DMA translation. + */ +void iommufd_device_detach(struct iommufd_device *idev) +{ + struct iommufd_hw_pagetable *hwpt = idev->hwpt; + + mutex_lock(&hwpt->ioas->mutex); + mutex_lock(&hwpt->devices_lock); + list_del(&idev->devices_item); + if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) { + if (list_empty(&hwpt->devices)) { + iopt_table_remove_domain(&hwpt->ioas->iopt, + hwpt->domain); + list_del(&hwpt->hwpt_item); + } + iommu_detach_group(hwpt->domain, idev->group); + } + iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev); + mutex_unlock(&hwpt->devices_lock); + mutex_unlock(&hwpt->ioas->mutex); + + if (hwpt->auto_domain) + iommufd_object_destroy_user(idev->ictx, &hwpt->obj); + else + refcount_dec(&hwpt->obj.users); + + idev->hwpt = NULL; + + refcount_dec(&idev->obj.users); +} +EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD); + +void iommufd_access_destroy_object(struct iommufd_object *obj) +{ + struct iommufd_access *access = + container_of(obj, struct iommufd_access, obj); + + iopt_remove_access(&access->ioas->iopt, access); + iommufd_ctx_put(access->ictx); + refcount_dec(&access->ioas->obj.users); +} + +/** + * iommufd_access_create - Create an iommufd_access + * @ictx: iommufd file descriptor + * @ioas_id: ID for a IOMMUFD_OBJ_IOAS + * @ops: Driver's ops to associate with the access + * @data: Opaque data to pass into ops functions + * + * An iommufd_access allows a driver to read/write to the IOAS without using + * DMA. The underlying CPU memory can be accessed using the + * iommufd_access_pin_pages() or iommufd_access_rw() functions. + * + * The provided ops are required to use iommufd_access_pin_pages(). + */ +struct iommufd_access * +iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id, + const struct iommufd_access_ops *ops, void *data) +{ + struct iommufd_access *access; + struct iommufd_object *obj; + int rc; + + /* + * There is no uAPI for the access object, but to keep things symmetric + * use the object infrastructure anyhow. + */ + access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS); + if (IS_ERR(access)) + return access; + + access->data = data; + access->ops = ops; + + obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS); + if (IS_ERR(obj)) { + rc = PTR_ERR(obj); + goto out_abort; + } + access->ioas = container_of(obj, struct iommufd_ioas, obj); + iommufd_ref_to_users(obj); + + if (ops->needs_pin_pages) + access->iova_alignment = PAGE_SIZE; + else + access->iova_alignment = 1; + rc = iopt_add_access(&access->ioas->iopt, access); + if (rc) + goto out_put_ioas; + + /* The calling driver is a user until iommufd_access_destroy() */ + refcount_inc(&access->obj.users); + access->ictx = ictx; + iommufd_ctx_get(ictx); + iommufd_object_finalize(ictx, &access->obj); + return access; +out_put_ioas: + refcount_dec(&access->ioas->obj.users); +out_abort: + iommufd_object_abort(ictx, &access->obj); + return ERR_PTR(rc); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD); + +/** + * iommufd_access_destroy - Destroy an iommufd_access + * @access: The access to destroy + * + * The caller must stop using the access before destroying it. + */ +void iommufd_access_destroy(struct iommufd_access *access) +{ + bool was_destroyed; + + was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj); + WARN_ON(!was_destroyed); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD); + +/** + * iommufd_access_notify_unmap - Notify users of an iopt to stop using it + * @iopt: iopt to work on + * @iova: Starting iova in the iopt + * @length: Number of bytes + * + * After this function returns there should be no users attached to the pages + * linked to this iopt that intersect with iova,length. Anyone that has attached + * a user through iopt_access_pages() needs to detach it through + * iommufd_access_unpin_pages() before this function returns. + * + * iommufd_access_destroy() will wait for any outstanding unmap callback to + * complete. Once iommufd_access_destroy() no unmap ops are running or will + * run in the future. Due to this a driver must not create locking that prevents + * unmap to complete while iommufd_access_destroy() is running. + */ +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + struct iommufd_ioas *ioas = + container_of(iopt, struct iommufd_ioas, iopt); + struct iommufd_access *access; + unsigned long index; + + xa_lock(&ioas->iopt.access_list); + xa_for_each(&ioas->iopt.access_list, index, access) { + if (!iommufd_lock_obj(&access->obj)) + continue; + xa_unlock(&ioas->iopt.access_list); + + access->ops->unmap(access->data, iova, length); + + iommufd_put_object(&access->obj); + xa_lock(&ioas->iopt.access_list); + } + xa_unlock(&ioas->iopt.access_list); +} + +/** + * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages + * @access: IOAS access to act on + * @iova: Starting IOVA + * @length: Number of bytes to access + * + * Return the struct page's. The caller must stop accessing them before calling + * this. The iova/length must exactly match the one provided to access_pages. + */ +void iommufd_access_unpin_pages(struct iommufd_access *access, + unsigned long iova, unsigned long length) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + + if (WARN_ON(!length) || + WARN_ON(check_add_overflow(iova, length - 1, &last_iova))) + return; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) + iopt_area_remove_access( + area, iopt_area_iova_to_index(area, iter.cur_iova), + iopt_area_iova_to_index( + area, + min(last_iova, iopt_area_last_iova(area)))); + up_read(&iopt->iova_rwsem); + WARN_ON(!iopt_area_contig_done(&iter)); +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD); + +static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter) +{ + if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE) + return false; + + if (!iopt_area_contig_done(iter) && + (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) % + PAGE_SIZE) != (PAGE_SIZE - 1)) + return false; + return true; +} + +static bool check_area_prot(struct iopt_area *area, unsigned int flags) +{ + if (flags & IOMMUFD_ACCESS_RW_WRITE) + return area->iommu_prot & IOMMU_WRITE; + return area->iommu_prot & IOMMU_READ; +} + +/** + * iommufd_access_pin_pages() - Return a list of pages under the iova + * @access: IOAS access to act on + * @iova: Starting IOVA + * @length: Number of bytes to access + * @out_pages: Output page list + * @flags: IOPMMUFD_ACCESS_RW_* flags + * + * Reads @length bytes starting at iova and returns the struct page * pointers. + * These can be kmap'd by the caller for CPU access. + * + * The caller must perform iommufd_access_unpin_pages() when done to balance + * this. + * + * This API always requires a page aligned iova. This happens naturally if the + * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However + * smaller alignments have corner cases where this API can fail on otherwise + * aligned iova. + */ +int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova, + unsigned long length, struct page **out_pages, + unsigned int flags) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + int rc; + + /* Driver's ops don't support pin_pages */ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap)) + return -EINVAL; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + unsigned long last_index = iopt_area_iova_to_index(area, last); + unsigned long index = + iopt_area_iova_to_index(area, iter.cur_iova); + + if (area->prevent_access || + !iopt_area_contig_is_aligned(&iter)) { + rc = -EINVAL; + goto err_remove; + } + + if (!check_area_prot(area, flags)) { + rc = -EPERM; + goto err_remove; + } + + rc = iopt_area_add_access(area, index, last_index, out_pages, + flags); + if (rc) + goto err_remove; + out_pages += last_index - index + 1; + } + if (!iopt_area_contig_done(&iter)) { + rc = -ENOENT; + goto err_remove; + } + + up_read(&iopt->iova_rwsem); + return 0; + +err_remove: + if (iova < iter.cur_iova) { + last_iova = iter.cur_iova - 1; + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) + iopt_area_remove_access( + area, + iopt_area_iova_to_index(area, iter.cur_iova), + iopt_area_iova_to_index( + area, min(last_iova, + iopt_area_last_iova(area)))); + } + up_read(&iopt->iova_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD); + +/** + * iommufd_access_rw - Read or write data under the iova + * @access: IOAS access to act on + * @iova: Starting IOVA + * @data: Kernel buffer to copy to/from + * @length: Number of bytes to access + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * Copy kernel to/from data into the range given by IOVA/length. If flags + * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized + * by changing it into copy_to/from_user(). + */ +int iommufd_access_rw(struct iommufd_access *access, unsigned long iova, + void *data, size_t length, unsigned int flags) +{ + struct io_pagetable *iopt = &access->ioas->iopt; + struct iopt_area_contig_iter iter; + struct iopt_area *area; + unsigned long last_iova; + int rc; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + unsigned long bytes = (last - iter.cur_iova) + 1; + + if (area->prevent_access) { + rc = -EINVAL; + goto err_out; + } + + if (!check_area_prot(area, flags)) { + rc = -EPERM; + goto err_out; + } + + rc = iopt_pages_rw_access( + area->pages, iopt_area_start_byte(area, iter.cur_iova), + data, bytes, flags); + if (rc) + goto err_out; + data += bytes; + } + if (!iopt_area_contig_done(&iter)) + rc = -ENOENT; +err_out: + up_read(&iopt->iova_rwsem); + return rc; +} +EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD); + +#ifdef CONFIG_IOMMUFD_TEST +/* + * Creating a real iommufd_device is too hard, bypass creating a iommufd_device + * and go directly to attaching a domain. + */ +struct iommufd_hw_pagetable * +iommufd_device_selftest_attach(struct iommufd_ctx *ictx, + struct iommufd_ioas *ioas, + struct device *mock_dev) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + hwpt = iommufd_hw_pagetable_alloc(ictx, ioas, mock_dev); + if (IS_ERR(hwpt)) + return hwpt; + + rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain); + if (rc) + goto out_hwpt; + + refcount_inc(&hwpt->obj.users); + iommufd_object_finalize(ictx, &hwpt->obj); + return hwpt; + +out_hwpt: + iommufd_object_abort_and_destroy(ictx, &hwpt->obj); + return ERR_PTR(rc); +} + +void iommufd_device_selftest_detach(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt) +{ + iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain); + refcount_dec(&hwpt->obj.users); +} +#endif diff --git a/drivers/iommu/iommufd/double_span.h b/drivers/iommu/iommufd/double_span.h new file mode 100644 index 0000000000000000000000000000000000000000..b37aab7488c0faa39fefee35c7aacaea32a0faf0 --- /dev/null +++ b/drivers/iommu/iommufd/double_span.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __IOMMUFD_DOUBLE_SPAN_H +#define __IOMMUFD_DOUBLE_SPAN_H + +#include + +/* + * This is a variation of the general interval_tree_span_iter that computes the + * spans over the union of two different interval trees. Used ranges are broken + * up and reported based on the tree that provides the interval. The first span + * always takes priority. Like interval_tree_span_iter it is greedy and the same + * value of is_used will not repeat on two iteration cycles. + */ +struct interval_tree_double_span_iter { + struct rb_root_cached *itrees[2]; + struct interval_tree_span_iter spans[2]; + union { + unsigned long start_hole; + unsigned long start_used; + }; + union { + unsigned long last_hole; + unsigned long last_used; + }; + /* 0 = hole, 1 = used span[0], 2 = used span[1], -1 done iteration */ + int is_used; +}; + +void interval_tree_double_span_iter_update( + struct interval_tree_double_span_iter *iter); +void interval_tree_double_span_iter_first( + struct interval_tree_double_span_iter *iter, + struct rb_root_cached *itree1, struct rb_root_cached *itree2, + unsigned long first_index, unsigned long last_index); +void interval_tree_double_span_iter_next( + struct interval_tree_double_span_iter *iter); + +static inline bool +interval_tree_double_span_iter_done(struct interval_tree_double_span_iter *state) +{ + return state->is_used == -1; +} + +#define interval_tree_for_each_double_span(span, itree1, itree2, first_index, \ + last_index) \ + for (interval_tree_double_span_iter_first(span, itree1, itree2, \ + first_index, last_index); \ + !interval_tree_double_span_iter_done(span); \ + interval_tree_double_span_iter_next(span)) + +#endif diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c new file mode 100644 index 0000000000000000000000000000000000000000..43d473989a0667701b93697f1efa9955e1555c1f --- /dev/null +++ b/drivers/iommu/iommufd/hw_pagetable.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include + +#include "iommufd_private.h" + +void iommufd_hw_pagetable_destroy(struct iommufd_object *obj) +{ + struct iommufd_hw_pagetable *hwpt = + container_of(obj, struct iommufd_hw_pagetable, obj); + + WARN_ON(!list_empty(&hwpt->devices)); + + iommu_domain_free(hwpt->domain); + refcount_dec(&hwpt->ioas->obj.users); + mutex_destroy(&hwpt->devices_lock); +} + +/** + * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device + * @ictx: iommufd context + * @ioas: IOAS to associate the domain with + * @dev: Device to get an iommu_domain for + * + * Allocate a new iommu_domain and return it as a hw_pagetable. + */ +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct device *dev) +{ + struct iommufd_hw_pagetable *hwpt; + int rc; + + hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE); + if (IS_ERR(hwpt)) + return hwpt; + + hwpt->domain = iommu_domain_alloc(dev->bus); + if (!hwpt->domain) { + rc = -ENOMEM; + goto out_abort; + } + + INIT_LIST_HEAD(&hwpt->devices); + INIT_LIST_HEAD(&hwpt->hwpt_item); + mutex_init(&hwpt->devices_lock); + /* Pairs with iommufd_hw_pagetable_destroy() */ + refcount_inc(&ioas->obj.users); + hwpt->ioas = ioas; + return hwpt; + +out_abort: + iommufd_object_abort(ictx, &hwpt->obj); + return ERR_PTR(rc); +} diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c new file mode 100644 index 0000000000000000000000000000000000000000..e0ae72b9e67f86f89ed29ac29c6363f52821c776 --- /dev/null +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -0,0 +1,1216 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The + * PFNs can be placed into an iommu_domain, or returned to the caller as a page + * list for access by an in-kernel user. + * + * The datastructure uses the iopt_pages to optimize the storage of the PFNs + * between the domains and xarray. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "double_span.h" + +struct iopt_pages_list { + struct iopt_pages *pages; + struct iopt_area *area; + struct list_head next; + unsigned long start_byte; + unsigned long length; +}; + +struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, + struct io_pagetable *iopt, + unsigned long iova, + unsigned long last_iova) +{ + lockdep_assert_held(&iopt->iova_rwsem); + + iter->cur_iova = iova; + iter->last_iova = last_iova; + iter->area = iopt_area_iter_first(iopt, iova, iova); + if (!iter->area) + return NULL; + if (!iter->area->pages) { + iter->area = NULL; + return NULL; + } + return iter->area; +} + +struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter) +{ + unsigned long last_iova; + + if (!iter->area) + return NULL; + last_iova = iopt_area_last_iova(iter->area); + if (iter->last_iova <= last_iova) + return NULL; + + iter->cur_iova = last_iova + 1; + iter->area = iopt_area_iter_next(iter->area, iter->cur_iova, + iter->last_iova); + if (!iter->area) + return NULL; + if (iter->cur_iova != iopt_area_iova(iter->area) || + !iter->area->pages) { + iter->area = NULL; + return NULL; + } + return iter->area; +} + +static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_used || span->last_hole - span->start_hole < length - 1) + return false; + + span->start_hole = ALIGN(span->start_hole, iova_alignment) | + page_offset; + if (span->start_hole > span->last_hole || + span->last_hole - span->start_hole < length - 1) + return false; + return true; +} + +static bool __alloc_iova_check_used(struct interval_tree_span_iter *span, + unsigned long length, + unsigned long iova_alignment, + unsigned long page_offset) +{ + if (span->is_hole || span->last_used - span->start_used < length - 1) + return false; + + span->start_used = ALIGN(span->start_used, iova_alignment) | + page_offset; + if (span->start_used > span->last_used || + span->last_used - span->start_used < length - 1) + return false; + return true; +} + +/* + * Automatically find a block of IOVA that is not being used and not reserved. + * Does not return a 0 IOVA even if it is valid. + */ +static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova, + unsigned long uptr, unsigned long length) +{ + unsigned long page_offset = uptr % PAGE_SIZE; + struct interval_tree_double_span_iter used_span; + struct interval_tree_span_iter allowed_span; + unsigned long iova_alignment; + + lockdep_assert_held(&iopt->iova_rwsem); + + /* Protect roundup_pow-of_two() from overflow */ + if (length == 0 || length >= ULONG_MAX / 2) + return -EOVERFLOW; + + /* + * Keep alignment present in the uptr when building the IOVA, this + * increases the chance we can map a THP. + */ + if (!uptr) + iova_alignment = roundup_pow_of_two(length); + else + iova_alignment = min_t(unsigned long, + roundup_pow_of_two(length), + 1UL << __ffs64(uptr)); + + if (iova_alignment < iopt->iova_alignment) + return -EINVAL; + + interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree, + PAGE_SIZE, ULONG_MAX - PAGE_SIZE) { + if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) { + allowed_span.start_used = PAGE_SIZE; + allowed_span.last_used = ULONG_MAX - PAGE_SIZE; + allowed_span.is_hole = false; + } + + if (!__alloc_iova_check_used(&allowed_span, length, + iova_alignment, page_offset)) + continue; + + interval_tree_for_each_double_span( + &used_span, &iopt->reserved_itree, &iopt->area_itree, + allowed_span.start_used, allowed_span.last_used) { + if (!__alloc_iova_check_hole(&used_span, length, + iova_alignment, + page_offset)) + continue; + + *iova = used_span.start_hole; + return 0; + } + } + return -ENOSPC; +} + +static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length) +{ + unsigned long last; + + lockdep_assert_held(&iopt->iova_rwsem); + + if ((iova & (iopt->iova_alignment - 1))) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, &last)) + return -EOVERFLOW; + + /* No reserved IOVA intersects the range */ + if (iopt_reserved_iter_first(iopt, iova, last)) + return -EINVAL; + + /* Check that there is not already a mapping in the range */ + if (iopt_area_iter_first(iopt, iova, last)) + return -EEXIST; + return 0; +} + +/* + * The area takes a slice of the pages from start_bytes to start_byte + length + */ +static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area, + struct iopt_pages *pages, unsigned long iova, + unsigned long start_byte, unsigned long length, + int iommu_prot) +{ + lockdep_assert_held_write(&iopt->iova_rwsem); + + if ((iommu_prot & IOMMU_WRITE) && !pages->writable) + return -EPERM; + + area->iommu_prot = iommu_prot; + area->page_offset = start_byte % PAGE_SIZE; + if (area->page_offset & (iopt->iova_alignment - 1)) + return -EINVAL; + + area->node.start = iova; + if (check_add_overflow(iova, length - 1, &area->node.last)) + return -EOVERFLOW; + + area->pages_node.start = start_byte / PAGE_SIZE; + if (check_add_overflow(start_byte, length - 1, &area->pages_node.last)) + return -EOVERFLOW; + area->pages_node.last = area->pages_node.last / PAGE_SIZE; + if (WARN_ON(area->pages_node.last >= pages->npages)) + return -EOVERFLOW; + + /* + * The area is inserted with a NULL pages indicating it is not fully + * initialized yet. + */ + area->iopt = iopt; + interval_tree_insert(&area->node, &iopt->area_itree); + return 0; +} + +static int iopt_alloc_area_pages(struct io_pagetable *iopt, + struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags) +{ + struct iopt_pages_list *elm; + unsigned long iova; + int rc = 0; + + list_for_each_entry(elm, pages_list, next) { + elm->area = kzalloc(sizeof(*elm->area), GFP_KERNEL_ACCOUNT); + if (!elm->area) + return -ENOMEM; + } + + down_write(&iopt->iova_rwsem); + if ((length & (iopt->iova_alignment - 1)) || !length) { + rc = -EINVAL; + goto out_unlock; + } + + if (flags & IOPT_ALLOC_IOVA) { + /* Use the first entry to guess the ideal IOVA alignment */ + elm = list_first_entry(pages_list, struct iopt_pages_list, + next); + rc = iopt_alloc_iova( + iopt, dst_iova, + (uintptr_t)elm->pages->uptr + elm->start_byte, length); + if (rc) + goto out_unlock; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) { + rc = -EINVAL; + goto out_unlock; + } + } else { + rc = iopt_check_iova(iopt, *dst_iova, length); + if (rc) + goto out_unlock; + } + + /* + * Areas are created with a NULL pages so that the IOVA space is + * reserved and we can unlock the iova_rwsem. + */ + iova = *dst_iova; + list_for_each_entry(elm, pages_list, next) { + rc = iopt_insert_area(iopt, elm->area, elm->pages, iova, + elm->start_byte, elm->length, iommu_prot); + if (rc) + goto out_unlock; + iova += elm->length; + } + +out_unlock: + up_write(&iopt->iova_rwsem); + return rc; +} + +static void iopt_abort_area(struct iopt_area *area) +{ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(area->pages); + if (area->iopt) { + down_write(&area->iopt->iova_rwsem); + interval_tree_remove(&area->node, &area->iopt->area_itree); + up_write(&area->iopt->iova_rwsem); + } + kfree(area); +} + +void iopt_free_pages_list(struct list_head *pages_list) +{ + struct iopt_pages_list *elm; + + while ((elm = list_first_entry_or_null(pages_list, + struct iopt_pages_list, next))) { + if (elm->area) + iopt_abort_area(elm->area); + if (elm->pages) + iopt_put_pages(elm->pages); + list_del(&elm->next); + kfree(elm); + } +} + +static int iopt_fill_domains_pages(struct list_head *pages_list) +{ + struct iopt_pages_list *undo_elm; + struct iopt_pages_list *elm; + int rc; + + list_for_each_entry(elm, pages_list, next) { + rc = iopt_area_fill_domains(elm->area, elm->pages); + if (rc) + goto err_undo; + } + return 0; + +err_undo: + list_for_each_entry(undo_elm, pages_list, next) { + if (undo_elm == elm) + break; + iopt_area_unfill_domains(undo_elm->area, undo_elm->pages); + } + return rc; +} + +int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags) +{ + struct iopt_pages_list *elm; + int rc; + + rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova, + iommu_prot, flags); + if (rc) + return rc; + + down_read(&iopt->domains_rwsem); + rc = iopt_fill_domains_pages(pages_list); + if (rc) + goto out_unlock_domains; + + down_write(&iopt->iova_rwsem); + list_for_each_entry(elm, pages_list, next) { + /* + * area->pages must be set inside the domains_rwsem to ensure + * any newly added domains will get filled. Moves the reference + * in from the list. + */ + elm->area->pages = elm->pages; + elm->pages = NULL; + elm->area = NULL; + } + up_write(&iopt->iova_rwsem); +out_unlock_domains: + up_read(&iopt->domains_rwsem); + return rc; +} + +/** + * iopt_map_user_pages() - Map a user VA to an iova in the io page table + * @ictx: iommufd_ctx the iopt is part of + * @iopt: io_pagetable to act on + * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains + * the chosen iova on output. Otherwise is the iova to map to on input + * @uptr: User VA to map + * @length: Number of bytes to map + * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping + * @flags: IOPT_ALLOC_IOVA or zero + * + * iova, uptr, and length must be aligned to iova_alignment. For domain backed + * page tables this will pin the pages and load them into the domain at iova. + * For non-domain page tables this will only setup a lazy reference and the + * caller must use iopt_access_pages() to touch them. + * + * iopt_unmap_iova() must be called to undo this before the io_pagetable can be + * destroyed. + */ +int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, void __user *uptr, + unsigned long length, int iommu_prot, + unsigned int flags) +{ + struct iopt_pages_list elm = {}; + LIST_HEAD(pages_list); + int rc; + + elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE); + if (IS_ERR(elm.pages)) + return PTR_ERR(elm.pages); + if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM && + elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER) + elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM; + elm.start_byte = uptr - elm.pages->uptr; + elm.length = length; + list_add(&elm.next, &pages_list); + + rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags); + if (rc) { + if (elm.area) + iopt_abort_area(elm.area); + if (elm.pages) + iopt_put_pages(elm.pages); + return rc; + } + return 0; +} + +int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, struct list_head *pages_list) +{ + struct iopt_area_contig_iter iter; + unsigned long last_iova; + struct iopt_area *area; + int rc; + + if (!length) + return -EINVAL; + if (check_add_overflow(iova, length - 1, &last_iova)) + return -EOVERFLOW; + + down_read(&iopt->iova_rwsem); + iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) { + struct iopt_pages_list *elm; + unsigned long last = min(last_iova, iopt_area_last_iova(area)); + + elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT); + if (!elm) { + rc = -ENOMEM; + goto err_free; + } + elm->start_byte = iopt_area_start_byte(area, iter.cur_iova); + elm->pages = area->pages; + elm->length = (last - iter.cur_iova) + 1; + kref_get(&elm->pages->kref); + list_add_tail(&elm->next, pages_list); + } + if (!iopt_area_contig_done(&iter)) { + rc = -ENOENT; + goto err_free; + } + up_read(&iopt->iova_rwsem); + return 0; +err_free: + up_read(&iopt->iova_rwsem); + iopt_free_pages_list(pages_list); + return rc; +} + +static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, + unsigned long last, unsigned long *unmapped) +{ + struct iopt_area *area; + unsigned long unmapped_bytes = 0; + int rc = -ENOENT; + + /* + * The domains_rwsem must be held in read mode any time any area->pages + * is NULL. This prevents domain attach/detatch from running + * concurrently with cleaning up the area. + */ +again: + down_read(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + while ((area = iopt_area_iter_first(iopt, start, last))) { + unsigned long area_last = iopt_area_last_iova(area); + unsigned long area_first = iopt_area_iova(area); + struct iopt_pages *pages; + + /* Userspace should not race map/unmap's of the same area */ + if (!area->pages) { + rc = -EBUSY; + goto out_unlock_iova; + } + + if (area_first < start || area_last > last) { + rc = -ENOENT; + goto out_unlock_iova; + } + + /* + * num_accesses writers must hold the iova_rwsem too, so we can + * safely read it under the write side of the iovam_rwsem + * without the pages->mutex. + */ + if (area->num_accesses) { + start = area_first; + area->prevent_access = true; + up_write(&iopt->iova_rwsem); + up_read(&iopt->domains_rwsem); + iommufd_access_notify_unmap(iopt, area_first, + iopt_area_length(area)); + if (WARN_ON(READ_ONCE(area->num_accesses))) + return -EDEADLOCK; + goto again; + } + + pages = area->pages; + area->pages = NULL; + up_write(&iopt->iova_rwsem); + + iopt_area_unfill_domains(area, pages); + iopt_abort_area(area); + iopt_put_pages(pages); + + unmapped_bytes += area_last - area_first + 1; + + down_write(&iopt->iova_rwsem); + } + if (unmapped_bytes) + rc = 0; + +out_unlock_iova: + up_write(&iopt->iova_rwsem); + up_read(&iopt->domains_rwsem); + if (unmapped) + *unmapped = unmapped_bytes; + return rc; +} + +/** + * iopt_unmap_iova() - Remove a range of iova + * @iopt: io_pagetable to act on + * @iova: Starting iova to unmap + * @length: Number of bytes to unmap + * @unmapped: Return number of bytes unmapped + * + * The requested range must be a superset of existing ranges. + * Splitting/truncating IOVA mappings is not allowed. + */ +int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, unsigned long *unmapped) +{ + unsigned long iova_last; + + if (!length) + return -EINVAL; + + if (check_add_overflow(iova, length - 1, &iova_last)) + return -EOVERFLOW; + + return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped); +} + +int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) +{ + int rc; + + rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); + /* If the IOVAs are empty then unmap all succeeds */ + if (rc == -ENOENT) + return 0; + return rc; +} + +/* The caller must always free all the nodes in the allowed_iova rb_root. */ +int iopt_set_allow_iova(struct io_pagetable *iopt, + struct rb_root_cached *allowed_iova) +{ + struct iopt_allowed *allowed; + + down_write(&iopt->iova_rwsem); + swap(*allowed_iova, iopt->allowed_itree); + + for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed; + allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) { + if (iopt_reserved_iter_first(iopt, allowed->node.start, + allowed->node.last)) { + swap(*allowed_iova, iopt->allowed_itree); + up_write(&iopt->iova_rwsem); + return -EADDRINUSE; + } + } + up_write(&iopt->iova_rwsem); + return 0; +} + +int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, + unsigned long last, void *owner) +{ + struct iopt_reserved *reserved; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + if (iopt_area_iter_first(iopt, start, last) || + iopt_allowed_iter_first(iopt, start, last)) + return -EADDRINUSE; + + reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT); + if (!reserved) + return -ENOMEM; + reserved->node.start = start; + reserved->node.last = last; + reserved->owner = owner; + interval_tree_insert(&reserved->node, &iopt->reserved_itree); + return 0; +} + +static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) +{ + struct iopt_reserved *reserved, *next; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved; + reserved = next) { + next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX); + + if (reserved->owner == owner) { + interval_tree_remove(&reserved->node, + &iopt->reserved_itree); + kfree(reserved); + } + } +} + +void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner) +{ + down_write(&iopt->iova_rwsem); + __iopt_remove_reserved_iova(iopt, owner); + up_write(&iopt->iova_rwsem); +} + +void iopt_init_table(struct io_pagetable *iopt) +{ + init_rwsem(&iopt->iova_rwsem); + init_rwsem(&iopt->domains_rwsem); + iopt->area_itree = RB_ROOT_CACHED; + iopt->allowed_itree = RB_ROOT_CACHED; + iopt->reserved_itree = RB_ROOT_CACHED; + xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT); + xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC); + + /* + * iopt's start as SW tables that can use the entire size_t IOVA space + * due to the use of size_t in the APIs. They have no alignment + * restriction. + */ + iopt->iova_alignment = 1; +} + +void iopt_destroy_table(struct io_pagetable *iopt) +{ + struct interval_tree_node *node; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + iopt_remove_reserved_iova(iopt, NULL); + + while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0, + ULONG_MAX))) { + interval_tree_remove(node, &iopt->allowed_itree); + kfree(container_of(node, struct iopt_allowed, node)); + } + + WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root)); + WARN_ON(!xa_empty(&iopt->domains)); + WARN_ON(!xa_empty(&iopt->access_list)); + WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root)); +} + +/** + * iopt_unfill_domain() - Unfill a domain with PFNs + * @iopt: io_pagetable to act on + * @domain: domain to unfill + * + * This is used when removing a domain from the iopt. Every area in the iopt + * will be unmapped from the domain. The domain must already be removed from the + * domains xarray. + */ +static void iopt_unfill_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iopt_area *area; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held_write(&iopt->domains_rwsem); + + /* + * Some other domain is holding all the pfns still, rapidly unmap this + * domain. + */ + if (iopt->next_domain_id != 0) { + /* Pick an arbitrary remaining domain to act as storage */ + struct iommu_domain *storage_domain = + xa_load(&iopt->domains, 0); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(!area->storage_domain); + if (area->storage_domain == domain) + area->storage_domain = storage_domain; + mutex_unlock(&pages->mutex); + + iopt_area_unmap_domain(area, domain); + } + return; + } + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + interval_tree_remove(&area->pages_node, &pages->domains_itree); + WARN_ON(area->storage_domain != domain); + area->storage_domain = NULL; + iopt_area_unfill_domain(area, pages, domain); + mutex_unlock(&pages->mutex); + } +} + +/** + * iopt_fill_domain() - Fill a domain with PFNs + * @iopt: io_pagetable to act on + * @domain: domain to fill + * + * Fill the domain with PFNs from every area in the iopt. On failure the domain + * is left unchanged. + */ +static int iopt_fill_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iopt_area *end_area; + struct iopt_area *area; + int rc; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held_write(&iopt->domains_rwsem); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (!pages) + continue; + + mutex_lock(&pages->mutex); + rc = iopt_area_fill_domain(area, domain); + if (rc) { + mutex_unlock(&pages->mutex); + goto out_unfill; + } + if (!area->storage_domain) { + WARN_ON(iopt->next_domain_id != 0); + area->storage_domain = domain; + interval_tree_insert(&area->pages_node, + &pages->domains_itree); + } + mutex_unlock(&pages->mutex); + } + return 0; + +out_unfill: + end_area = area; + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + struct iopt_pages *pages = area->pages; + + if (area == end_area) + break; + if (!pages) + continue; + mutex_lock(&pages->mutex); + if (iopt->next_domain_id == 0) { + interval_tree_remove(&area->pages_node, + &pages->domains_itree); + area->storage_domain = NULL; + } + iopt_area_unfill_domain(area, pages, domain); + mutex_unlock(&pages->mutex); + } + return rc; +} + +/* All existing area's conform to an increased page size */ +static int iopt_check_iova_alignment(struct io_pagetable *iopt, + unsigned long new_iova_alignment) +{ + unsigned long align_mask = new_iova_alignment - 1; + struct iopt_area *area; + + lockdep_assert_held(&iopt->iova_rwsem); + lockdep_assert_held(&iopt->domains_rwsem); + + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) + if ((iopt_area_iova(area) & align_mask) || + (iopt_area_length(area) & align_mask) || + (area->page_offset & align_mask)) + return -EADDRINUSE; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) { + struct iommufd_access *access; + unsigned long index; + + xa_for_each(&iopt->access_list, index, access) + if (WARN_ON(access->iova_alignment > + new_iova_alignment)) + return -EADDRINUSE; + } + return 0; +} + +int iopt_table_add_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + const struct iommu_domain_geometry *geometry = &domain->geometry; + struct iommu_domain *iter_domain; + unsigned int new_iova_alignment; + unsigned long index; + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + + xa_for_each(&iopt->domains, index, iter_domain) { + if (WARN_ON(iter_domain == domain)) { + rc = -EEXIST; + goto out_unlock; + } + } + + /* + * The io page size drives the iova_alignment. Internally the iopt_pages + * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE + * objects into the iommu_domain. + * + * A iommu_domain must always be able to accept PAGE_SIZE to be + * compatible as we can't guarantee higher contiguity. + */ + new_iova_alignment = max_t(unsigned long, + 1UL << __ffs(domain->pgsize_bitmap), + iopt->iova_alignment); + if (new_iova_alignment > PAGE_SIZE) { + rc = -EINVAL; + goto out_unlock; + } + if (new_iova_alignment != iopt->iova_alignment) { + rc = iopt_check_iova_alignment(iopt, new_iova_alignment); + if (rc) + goto out_unlock; + } + + /* No area exists that is outside the allowed domain aperture */ + if (geometry->aperture_start != 0) { + rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1, + domain); + if (rc) + goto out_reserved; + } + if (geometry->aperture_end != ULONG_MAX) { + rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1, + ULONG_MAX, domain); + if (rc) + goto out_reserved; + } + + rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL); + if (rc) + goto out_reserved; + + rc = iopt_fill_domain(iopt, domain); + if (rc) + goto out_release; + + iopt->iova_alignment = new_iova_alignment; + xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL); + iopt->next_domain_id++; + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return 0; +out_release: + xa_release(&iopt->domains, iopt->next_domain_id); +out_reserved: + __iopt_remove_reserved_iova(iopt, domain); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +static int iopt_calculate_iova_alignment(struct io_pagetable *iopt) +{ + unsigned long new_iova_alignment; + struct iommufd_access *access; + struct iommu_domain *domain; + unsigned long index; + + lockdep_assert_held_write(&iopt->iova_rwsem); + lockdep_assert_held(&iopt->domains_rwsem); + + /* See batch_iommu_map_small() */ + if (iopt->disable_large_pages) + new_iova_alignment = PAGE_SIZE; + else + new_iova_alignment = 1; + + xa_for_each(&iopt->domains, index, domain) + new_iova_alignment = max_t(unsigned long, + 1UL << __ffs(domain->pgsize_bitmap), + new_iova_alignment); + xa_for_each(&iopt->access_list, index, access) + new_iova_alignment = max_t(unsigned long, + access->iova_alignment, + new_iova_alignment); + + if (new_iova_alignment > iopt->iova_alignment) { + int rc; + + rc = iopt_check_iova_alignment(iopt, new_iova_alignment); + if (rc) + return rc; + } + iopt->iova_alignment = new_iova_alignment; + return 0; +} + +void iopt_table_remove_domain(struct io_pagetable *iopt, + struct iommu_domain *domain) +{ + struct iommu_domain *iter_domain = NULL; + unsigned long index; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + + xa_for_each(&iopt->domains, index, iter_domain) + if (iter_domain == domain) + break; + if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id) + goto out_unlock; + + /* + * Compress the xarray to keep it linear by swapping the entry to erase + * with the tail entry and shrinking the tail. + */ + iopt->next_domain_id--; + iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id); + if (index != iopt->next_domain_id) + xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL); + + iopt_unfill_domain(iopt, domain); + __iopt_remove_reserved_iova(iopt, domain); + + WARN_ON(iopt_calculate_iova_alignment(iopt)); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +/** + * iopt_area_split - Split an area into two parts at iova + * @area: The area to split + * @iova: Becomes the last of a new area + * + * This splits an area into two. It is part of the VFIO compatibility to allow + * poking a hole in the mapping. The two areas continue to point at the same + * iopt_pages, just with different starting bytes. + */ +static int iopt_area_split(struct iopt_area *area, unsigned long iova) +{ + unsigned long alignment = area->iopt->iova_alignment; + unsigned long last_iova = iopt_area_last_iova(area); + unsigned long start_iova = iopt_area_iova(area); + unsigned long new_start = iova + 1; + struct io_pagetable *iopt = area->iopt; + struct iopt_pages *pages = area->pages; + struct iopt_area *lhs; + struct iopt_area *rhs; + int rc; + + lockdep_assert_held_write(&iopt->iova_rwsem); + + if (iova == start_iova || iova == last_iova) + return 0; + + if (!pages || area->prevent_access) + return -EBUSY; + + if (new_start & (alignment - 1) || + iopt_area_start_byte(area, new_start) & (alignment - 1)) + return -EINVAL; + + lhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); + if (!lhs) + return -ENOMEM; + + rhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT); + if (!rhs) { + rc = -ENOMEM; + goto err_free_lhs; + } + + mutex_lock(&pages->mutex); + /* + * Splitting is not permitted if an access exists, we don't track enough + * information to split existing accesses. + */ + if (area->num_accesses) { + rc = -EINVAL; + goto err_unlock; + } + + /* + * Splitting is not permitted if a domain could have been mapped with + * huge pages. + */ + if (area->storage_domain && !iopt->disable_large_pages) { + rc = -EINVAL; + goto err_unlock; + } + + interval_tree_remove(&area->node, &iopt->area_itree); + rc = iopt_insert_area(iopt, lhs, area->pages, start_iova, + iopt_area_start_byte(area, start_iova), + (new_start - 1) - start_iova + 1, + area->iommu_prot); + if (WARN_ON(rc)) + goto err_insert; + + rc = iopt_insert_area(iopt, rhs, area->pages, new_start, + iopt_area_start_byte(area, new_start), + last_iova - new_start + 1, area->iommu_prot); + if (WARN_ON(rc)) + goto err_remove_lhs; + + lhs->storage_domain = area->storage_domain; + lhs->pages = area->pages; + rhs->storage_domain = area->storage_domain; + rhs->pages = area->pages; + kref_get(&rhs->pages->kref); + kfree(area); + mutex_unlock(&pages->mutex); + + /* + * No change to domains or accesses because the pages hasn't been + * changed + */ + return 0; + +err_remove_lhs: + interval_tree_remove(&lhs->node, &iopt->area_itree); +err_insert: + interval_tree_insert(&area->node, &iopt->area_itree); +err_unlock: + mutex_unlock(&pages->mutex); + kfree(rhs); +err_free_lhs: + kfree(lhs); + return rc; +} + +int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, + size_t num_iovas) +{ + int rc = 0; + int i; + + down_write(&iopt->iova_rwsem); + for (i = 0; i < num_iovas; i++) { + struct iopt_area *area; + + area = iopt_area_iter_first(iopt, iovas[i], iovas[i]); + if (!area) + continue; + rc = iopt_area_split(area, iovas[i]); + if (rc) + break; + } + up_write(&iopt->iova_rwsem); + return rc; +} + +void iopt_enable_large_pages(struct io_pagetable *iopt) +{ + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + WRITE_ONCE(iopt->disable_large_pages, false); + rc = iopt_calculate_iova_alignment(iopt); + WARN_ON(rc); + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +int iopt_disable_large_pages(struct io_pagetable *iopt) +{ + int rc = 0; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + if (iopt->disable_large_pages) + goto out_unlock; + + /* Won't do it if domains already have pages mapped in them */ + if (!xa_empty(&iopt->domains) && + !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) { + rc = -EINVAL; + goto out_unlock; + } + + WRITE_ONCE(iopt->disable_large_pages, true); + rc = iopt_calculate_iova_alignment(iopt); + if (rc) + WRITE_ONCE(iopt->disable_large_pages, false); +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access) +{ + int rc; + + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access, + xa_limit_16b, GFP_KERNEL_ACCOUNT); + if (rc) + goto out_unlock; + + rc = iopt_calculate_iova_alignment(iopt); + if (rc) { + xa_erase(&iopt->access_list, access->iopt_access_list_id); + goto out_unlock; + } + +out_unlock: + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); + return rc; +} + +void iopt_remove_access(struct io_pagetable *iopt, + struct iommufd_access *access) +{ + down_write(&iopt->domains_rwsem); + down_write(&iopt->iova_rwsem); + WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) != + access); + WARN_ON(iopt_calculate_iova_alignment(iopt)); + up_write(&iopt->iova_rwsem); + up_write(&iopt->domains_rwsem); +} + +/* Narrow the valid_iova_itree to include reserved ranges from a group. */ +int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, + struct device *device, + struct iommu_group *group, + phys_addr_t *sw_msi_start) +{ + struct iommu_resv_region *resv; + struct iommu_resv_region *tmp; + LIST_HEAD(group_resv_regions); + unsigned int num_hw_msi = 0; + unsigned int num_sw_msi = 0; + int rc; + + down_write(&iopt->iova_rwsem); + rc = iommu_get_group_resv_regions(group, &group_resv_regions); + if (rc) + goto out_unlock; + + list_for_each_entry(resv, &group_resv_regions, list) { + if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE) + continue; + + if (sw_msi_start && resv->type == IOMMU_RESV_MSI) + num_hw_msi++; + if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) { + *sw_msi_start = resv->start; + num_sw_msi++; + } + + rc = iopt_reserve_iova(iopt, resv->start, + resv->length - 1 + resv->start, device); + if (rc) + goto out_reserved; + } + + /* Drivers must offer sane combinations of regions */ + if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) { + rc = -EINVAL; + goto out_reserved; + } + + rc = 0; + goto out_free_resv; + +out_reserved: + __iopt_remove_reserved_iova(iopt, device); +out_free_resv: + list_for_each_entry_safe(resv, tmp, &group_resv_regions, list) + kfree(resv); +out_unlock: + up_write(&iopt->iova_rwsem); + return rc; +} diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h new file mode 100644 index 0000000000000000000000000000000000000000..0ec3509b7e339243e1c70a62c2bb0300191e5075 --- /dev/null +++ b/drivers/iommu/iommufd/io_pagetable.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + */ +#ifndef __IO_PAGETABLE_H +#define __IO_PAGETABLE_H + +#include +#include +#include +#include + +#include "iommufd_private.h" + +struct iommu_domain; + +/* + * Each io_pagetable is composed of intervals of areas which cover regions of + * the iova that are backed by something. iova not covered by areas is not + * populated in the page table. Each area is fully populated with pages. + * + * iovas are in byte units, but must be iopt->iova_alignment aligned. + * + * pages can be NULL, this means some other thread is still working on setting + * up or tearing down the area. When observed under the write side of the + * domain_rwsem a NULL pages must mean the area is still being setup and no + * domains are filled. + * + * storage_domain points at an arbitrary iommu_domain that is holding the PFNs + * for this area. It is locked by the pages->mutex. This simplifies the locking + * as the pages code can rely on the storage_domain without having to get the + * iopt->domains_rwsem. + * + * The io_pagetable::iova_rwsem protects node + * The iopt_pages::mutex protects pages_node + * iopt and iommu_prot are immutable + * The pages::mutex protects num_accesses + */ +struct iopt_area { + struct interval_tree_node node; + struct interval_tree_node pages_node; + struct io_pagetable *iopt; + struct iopt_pages *pages; + struct iommu_domain *storage_domain; + /* How many bytes into the first page the area starts */ + unsigned int page_offset; + /* IOMMU_READ, IOMMU_WRITE, etc */ + int iommu_prot; + bool prevent_access : 1; + unsigned int num_accesses; +}; + +struct iopt_allowed { + struct interval_tree_node node; +}; + +struct iopt_reserved { + struct interval_tree_node node; + void *owner; +}; + +int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages); +void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages); + +int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain); +void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, + struct iommu_domain *domain); +void iopt_area_unmap_domain(struct iopt_area *area, + struct iommu_domain *domain); + +static inline unsigned long iopt_area_index(struct iopt_area *area) +{ + return area->pages_node.start; +} + +static inline unsigned long iopt_area_last_index(struct iopt_area *area) +{ + return area->pages_node.last; +} + +static inline unsigned long iopt_area_iova(struct iopt_area *area) +{ + return area->node.start; +} + +static inline unsigned long iopt_area_last_iova(struct iopt_area *area) +{ + return area->node.last; +} + +static inline size_t iopt_area_length(struct iopt_area *area) +{ + return (area->node.last - area->node.start) + 1; +} + +/* + * Number of bytes from the start of the iopt_pages that the iova begins. + * iopt_area_start_byte() / PAGE_SIZE encodes the starting page index + * iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page + */ +static inline unsigned long iopt_area_start_byte(struct iopt_area *area, + unsigned long iova) +{ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(iova < iopt_area_iova(area) || + iova > iopt_area_last_iova(area)); + return (iova - iopt_area_iova(area)) + area->page_offset + + iopt_area_index(area) * PAGE_SIZE; +} + +static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area, + unsigned long iova) +{ + return iopt_area_start_byte(area, iova) / PAGE_SIZE; +} + +#define __make_iopt_iter(name) \ + static inline struct iopt_##name *iopt_##name##_iter_first( \ + struct io_pagetable *iopt, unsigned long start, \ + unsigned long last) \ + { \ + struct interval_tree_node *node; \ + \ + lockdep_assert_held(&iopt->iova_rwsem); \ + node = interval_tree_iter_first(&iopt->name##_itree, start, \ + last); \ + if (!node) \ + return NULL; \ + return container_of(node, struct iopt_##name, node); \ + } \ + static inline struct iopt_##name *iopt_##name##_iter_next( \ + struct iopt_##name *last_node, unsigned long start, \ + unsigned long last) \ + { \ + struct interval_tree_node *node; \ + \ + node = interval_tree_iter_next(&last_node->node, start, last); \ + if (!node) \ + return NULL; \ + return container_of(node, struct iopt_##name, node); \ + } + +__make_iopt_iter(area) +__make_iopt_iter(allowed) +__make_iopt_iter(reserved) + +struct iopt_area_contig_iter { + unsigned long cur_iova; + unsigned long last_iova; + struct iopt_area *area; +}; +struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter, + struct io_pagetable *iopt, + unsigned long iova, + unsigned long last_iova); +struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter); + +static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter) +{ + return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area); +} + +/* + * Iterate over a contiguous list of areas that span the iova,last_iova range. + * The caller must check iopt_area_contig_done() after the loop to see if + * contiguous areas existed. + */ +#define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova) \ + for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \ + area = iopt_area_contig_next(iter)) + +enum { + IOPT_PAGES_ACCOUNT_NONE = 0, + IOPT_PAGES_ACCOUNT_USER = 1, + IOPT_PAGES_ACCOUNT_MM = 2, +}; + +/* + * This holds a pinned page list for multiple areas of IO address space. The + * pages always originate from a linear chunk of userspace VA. Multiple + * io_pagetable's, through their iopt_area's, can share a single iopt_pages + * which avoids multi-pinning and double accounting of page consumption. + * + * indexes in this structure are measured in PAGE_SIZE units, are 0 based from + * the start of the uptr and extend to npages. pages are pinned dynamically + * according to the intervals in the access_itree and domains_itree, npinned + * records the current number of pages pinned. + */ +struct iopt_pages { + struct kref kref; + struct mutex mutex; + size_t npages; + size_t npinned; + size_t last_npinned; + struct task_struct *source_task; + struct mm_struct *source_mm; + struct user_struct *source_user; + void __user *uptr; + bool writable:1; + u8 account_mode; + + struct xarray pinned_pfns; + /* Of iopt_pages_access::node */ + struct rb_root_cached access_itree; + /* Of iopt_area::pages_node */ + struct rb_root_cached domains_itree; +}; + +struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, + bool writable); +void iopt_release_pages(struct kref *kref); +static inline void iopt_put_pages(struct iopt_pages *pages) +{ + kref_put(&pages->kref, iopt_release_pages); +} + +void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last, struct page **out_pages); +int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last, struct page **out_pages); +void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start, + unsigned long last); + +int iopt_area_add_access(struct iopt_area *area, unsigned long start, + unsigned long last, struct page **out_pages, + unsigned int flags); +void iopt_area_remove_access(struct iopt_area *area, unsigned long start, + unsigned long last); +int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, + void *data, unsigned long length, unsigned int flags); + +/* + * Each interval represents an active iopt_access_pages(), it acts as an + * interval lock that keeps the PFNs pinned and stored in the xarray. + */ +struct iopt_pages_access { + struct interval_tree_node node; + unsigned int users; +}; + +#endif diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c new file mode 100644 index 0000000000000000000000000000000000000000..31577e9d434f878425303d1985c28668270afde4 --- /dev/null +++ b/drivers/iommu/iommufd/ioas.c @@ -0,0 +1,398 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include + +#include "io_pagetable.h" + +void iommufd_ioas_destroy(struct iommufd_object *obj) +{ + struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj); + int rc; + + rc = iopt_unmap_all(&ioas->iopt, NULL); + WARN_ON(rc && rc != -ENOENT); + iopt_destroy_table(&ioas->iopt); + mutex_destroy(&ioas->mutex); +} + +struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx) +{ + struct iommufd_ioas *ioas; + + ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS); + if (IS_ERR(ioas)) + return ioas; + + iopt_init_table(&ioas->iopt); + INIT_LIST_HEAD(&ioas->hwpt_list); + mutex_init(&ioas->mutex); + return ioas; +} + +int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_alloc *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + int rc; + + if (cmd->flags) + return -EOPNOTSUPP; + + ioas = iommufd_ioas_alloc(ucmd->ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + cmd->out_ioas_id = ioas->obj.id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_table; + iommufd_object_finalize(ucmd->ictx, &ioas->obj); + return 0; + +out_table: + iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj); + return rc; +} + +int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd) +{ + struct iommu_iova_range __user *ranges; + struct iommu_ioas_iova_ranges *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + struct interval_tree_span_iter span; + u32 max_iovas; + int rc; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + down_read(&ioas->iopt.iova_rwsem); + max_iovas = cmd->num_iovas; + ranges = u64_to_user_ptr(cmd->allowed_iovas); + cmd->num_iovas = 0; + cmd->out_iova_alignment = ioas->iopt.iova_alignment; + interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, + ULONG_MAX) { + if (!span.is_hole) + continue; + if (cmd->num_iovas < max_iovas) { + struct iommu_iova_range elm = { + .start = span.start_hole, + .last = span.last_hole, + }; + + if (copy_to_user(&ranges[cmd->num_iovas], &elm, + sizeof(elm))) { + rc = -EFAULT; + goto out_put; + } + } + cmd->num_iovas++; + } + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_put; + if (cmd->num_iovas > max_iovas) + rc = -EMSGSIZE; +out_put: + up_read(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_ioas_load_iovas(struct rb_root_cached *itree, + struct iommu_iova_range __user *ranges, + u32 num) +{ + u32 i; + + for (i = 0; i != num; i++) { + struct iommu_iova_range range; + struct iopt_allowed *allowed; + + if (copy_from_user(&range, ranges + i, sizeof(range))) + return -EFAULT; + + if (range.start >= range.last) + return -EINVAL; + + if (interval_tree_iter_first(itree, range.start, range.last)) + return -EINVAL; + + allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT); + if (!allowed) + return -ENOMEM; + allowed->node.start = range.start; + allowed->node.last = range.last; + + interval_tree_insert(&allowed->node, itree); + } + return 0; +} + +int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_allow_iovas *cmd = ucmd->cmd; + struct rb_root_cached allowed_iova = RB_ROOT_CACHED; + struct interval_tree_node *node; + struct iommufd_ioas *ioas; + struct io_pagetable *iopt; + int rc = 0; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + iopt = &ioas->iopt; + + rc = iommufd_ioas_load_iovas(&allowed_iova, + u64_to_user_ptr(cmd->allowed_iovas), + cmd->num_iovas); + if (rc) + goto out_free; + + /* + * We want the allowed tree update to be atomic, so we have to keep the + * original nodes around, and keep track of the new nodes as we allocate + * memory for them. The simplest solution is to have a new/old tree and + * then swap new for old. On success we free the old tree, on failure we + * free the new tree. + */ + rc = iopt_set_allow_iova(iopt, &allowed_iova); +out_free: + while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) { + interval_tree_remove(node, &allowed_iova); + kfree(container_of(node, struct iopt_allowed, node)); + } + iommufd_put_object(&ioas->obj); + return rc; +} + +static int conv_iommu_prot(u32 map_flags) +{ + /* + * We provide no manual cache coherency ioctls to userspace and most + * architectures make the CPU ops for cache flushing privileged. + * Therefore we require the underlying IOMMU to support CPU coherent + * operation. Support for IOMMU_CACHE is enforced by the + * IOMMU_CAP_CACHE_COHERENCY test during bind. + */ + int iommu_prot = IOMMU_CACHE; + + if (map_flags & IOMMU_IOAS_MAP_WRITEABLE) + iommu_prot |= IOMMU_WRITE; + if (map_flags & IOMMU_IOAS_MAP_READABLE) + iommu_prot |= IOMMU_READ; + return iommu_prot; +} + +int iommufd_ioas_map(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_map *cmd = ucmd->cmd; + unsigned long iova = cmd->iova; + struct iommufd_ioas *ioas; + unsigned int flags = 0; + int rc; + + if ((cmd->flags & + ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE)) || + cmd->__reserved) + return -EOPNOTSUPP; + if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) + return -EOVERFLOW; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) + flags = IOPT_ALLOC_IOVA; + rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova, + u64_to_user_ptr(cmd->user_va), cmd->length, + conv_iommu_prot(cmd->flags), flags); + if (rc) + goto out_put; + + cmd->iova = iova; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_ioas_copy(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_copy *cmd = ucmd->cmd; + struct iommufd_ioas *src_ioas; + struct iommufd_ioas *dst_ioas; + unsigned int flags = 0; + LIST_HEAD(pages_list); + unsigned long iova; + int rc; + + iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova, + &cmd->flags); + + if ((cmd->flags & + ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE | + IOMMU_IOAS_MAP_READABLE))) + return -EOPNOTSUPP; + if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX || + cmd->dst_iova >= ULONG_MAX) + return -EOVERFLOW; + + src_ioas = iommufd_get_ioas(ucmd, cmd->src_ioas_id); + if (IS_ERR(src_ioas)) + return PTR_ERR(src_ioas); + rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length, + &pages_list); + iommufd_put_object(&src_ioas->obj); + if (rc) + return rc; + + dst_ioas = iommufd_get_ioas(ucmd, cmd->dst_ioas_id); + if (IS_ERR(dst_ioas)) { + rc = PTR_ERR(dst_ioas); + goto out_pages; + } + + if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA)) + flags = IOPT_ALLOC_IOVA; + iova = cmd->dst_iova; + rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova, + conv_iommu_prot(cmd->flags), flags); + if (rc) + goto out_put_dst; + + cmd->dst_iova = iova; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); +out_put_dst: + iommufd_put_object(&dst_ioas->obj); +out_pages: + iopt_free_pages_list(&pages_list); + return rc; +} + +int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) +{ + struct iommu_ioas_unmap *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + unsigned long unmapped = 0; + int rc; + + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (cmd->iova == 0 && cmd->length == U64_MAX) { + rc = iopt_unmap_all(&ioas->iopt, &unmapped); + if (rc) + goto out_put; + } else { + if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) { + rc = -EOVERFLOW; + goto out_put; + } + rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length, + &unmapped); + if (rc) + goto out_put; + } + + cmd->length = unmapped; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_option_rlimit_mode(struct iommu_option *cmd, + struct iommufd_ctx *ictx) +{ + if (cmd->object_id) + return -EOPNOTSUPP; + + if (cmd->op == IOMMU_OPTION_OP_GET) { + cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM; + return 0; + } + if (cmd->op == IOMMU_OPTION_OP_SET) { + int rc = 0; + + if (!capable(CAP_SYS_RESOURCE)) + return -EPERM; + + xa_lock(&ictx->objects); + if (!xa_empty(&ictx->objects)) { + rc = -EBUSY; + } else { + if (cmd->val64 == 0) + ictx->account_mode = IOPT_PAGES_ACCOUNT_USER; + else if (cmd->val64 == 1) + ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; + else + rc = -EINVAL; + } + xa_unlock(&ictx->objects); + + return rc; + } + return -EOPNOTSUPP; +} + +static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd, + struct iommufd_ioas *ioas) +{ + if (cmd->op == IOMMU_OPTION_OP_GET) { + cmd->val64 = !ioas->iopt.disable_large_pages; + return 0; + } + if (cmd->op == IOMMU_OPTION_OP_SET) { + if (cmd->val64 == 0) + return iopt_disable_large_pages(&ioas->iopt); + if (cmd->val64 == 1) { + iopt_enable_large_pages(&ioas->iopt); + return 0; + } + return -EINVAL; + } + return -EOPNOTSUPP; +} + +int iommufd_ioas_option(struct iommufd_ucmd *ucmd) +{ + struct iommu_option *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + int rc = 0; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + ioas = iommufd_get_ioas(ucmd, cmd->object_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + switch (cmd->option_id) { + case IOMMU_OPTION_HUGE_PAGES: + rc = iommufd_ioas_option_huge_pages(cmd, ioas); + break; + default: + rc = -EOPNOTSUPP; + } + + iommufd_put_object(&ioas->obj); + return rc; +} diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h new file mode 100644 index 0000000000000000000000000000000000000000..222e86591f8ac98a5ac11a5e56fb9245d16a35d4 --- /dev/null +++ b/drivers/iommu/iommufd/iommufd_private.h @@ -0,0 +1,307 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#ifndef __IOMMUFD_PRIVATE_H +#define __IOMMUFD_PRIVATE_H + +#include +#include +#include +#include + +struct iommu_domain; +struct iommu_group; +struct iommu_option; + +struct iommufd_ctx { + struct file *file; + struct xarray objects; + + u8 account_mode; + struct iommufd_ioas *vfio_ioas; +}; + +/* + * The IOVA to PFN map. The map automatically copies the PFNs into multiple + * domains and permits sharing of PFNs between io_pagetable instances. This + * supports both a design where IOAS's are 1:1 with a domain (eg because the + * domain is HW customized), or where the IOAS is 1:N with multiple generic + * domains. The io_pagetable holds an interval tree of iopt_areas which point + * to shared iopt_pages which hold the pfns mapped to the page table. + * + * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex + */ +struct io_pagetable { + struct rw_semaphore domains_rwsem; + struct xarray domains; + struct xarray access_list; + unsigned int next_domain_id; + + struct rw_semaphore iova_rwsem; + struct rb_root_cached area_itree; + /* IOVA that cannot become reserved, struct iopt_allowed */ + struct rb_root_cached allowed_itree; + /* IOVA that cannot be allocated, struct iopt_reserved */ + struct rb_root_cached reserved_itree; + u8 disable_large_pages; + unsigned long iova_alignment; +}; + +void iopt_init_table(struct io_pagetable *iopt); +void iopt_destroy_table(struct io_pagetable *iopt); +int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, struct list_head *pages_list); +void iopt_free_pages_list(struct list_head *pages_list); +enum { + IOPT_ALLOC_IOVA = 1 << 0, +}; +int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt, + unsigned long *iova, void __user *uptr, + unsigned long length, int iommu_prot, + unsigned int flags); +int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list, + unsigned long length, unsigned long *dst_iova, + int iommu_prot, unsigned int flags); +int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, + unsigned long length, unsigned long *unmapped); +int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped); + +void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova, + unsigned long length); +int iopt_table_add_domain(struct io_pagetable *iopt, + struct iommu_domain *domain); +void iopt_table_remove_domain(struct io_pagetable *iopt, + struct iommu_domain *domain); +int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt, + struct device *device, + struct iommu_group *group, + phys_addr_t *sw_msi_start); +int iopt_set_allow_iova(struct io_pagetable *iopt, + struct rb_root_cached *allowed_iova); +int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start, + unsigned long last, void *owner); +void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner); +int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas, + size_t num_iovas); +void iopt_enable_large_pages(struct io_pagetable *iopt); +int iopt_disable_large_pages(struct io_pagetable *iopt); + +struct iommufd_ucmd { + struct iommufd_ctx *ictx; + void __user *ubuffer; + u32 user_size; + void *cmd; +}; + +int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, + unsigned long arg); + +/* Copy the response in ucmd->cmd back to userspace. */ +static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd, + size_t cmd_len) +{ + if (copy_to_user(ucmd->ubuffer, ucmd->cmd, + min_t(size_t, ucmd->user_size, cmd_len))) + return -EFAULT; + return 0; +} + +enum iommufd_object_type { + IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE, + IOMMUFD_OBJ_DEVICE, + IOMMUFD_OBJ_HW_PAGETABLE, + IOMMUFD_OBJ_IOAS, + IOMMUFD_OBJ_ACCESS, +#ifdef CONFIG_IOMMUFD_TEST + IOMMUFD_OBJ_SELFTEST, +#endif +}; + +/* Base struct for all objects with a userspace ID handle. */ +struct iommufd_object { + struct rw_semaphore destroy_rwsem; + refcount_t users; + enum iommufd_object_type type; + unsigned int id; +}; + +static inline bool iommufd_lock_obj(struct iommufd_object *obj) +{ + if (!down_read_trylock(&obj->destroy_rwsem)) + return false; + if (!refcount_inc_not_zero(&obj->users)) { + up_read(&obj->destroy_rwsem); + return false; + } + return true; +} + +struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, + enum iommufd_object_type type); +static inline void iommufd_put_object(struct iommufd_object *obj) +{ + refcount_dec(&obj->users); + up_read(&obj->destroy_rwsem); +} + +/** + * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount + * protection + * @obj - Object to release + * + * Objects have two refcount protections (destroy_rwsem and the refcount_t + * users). Holding either of these will prevent the object from being destroyed. + * + * Depending on the use case, one protection or the other is appropriate. In + * most cases references are being protected by the destroy_rwsem. This allows + * orderly destruction of the object because iommufd_object_destroy_user() will + * wait for it to become unlocked. However, as a rwsem, it cannot be held across + * a system call return. So cases that have longer term needs must switch + * to the weaker users refcount_t. + * + * With users protection iommufd_object_destroy_user() will return false, + * refusing to destroy the object, causing -EBUSY to userspace. + */ +static inline void iommufd_ref_to_users(struct iommufd_object *obj) +{ + up_read(&obj->destroy_rwsem); + /* iommufd_lock_obj() obtains users as well */ +} +void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj); +void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +void iommufd_object_finalize(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj); +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type); + +#define iommufd_object_alloc(ictx, ptr, type) \ + container_of(_iommufd_object_alloc( \ + ictx, \ + sizeof(*(ptr)) + BUILD_BUG_ON_ZERO( \ + offsetof(typeof(*(ptr)), \ + obj) != 0), \ + type), \ + typeof(*(ptr)), obj) + +/* + * The IO Address Space (IOAS) pagetable is a virtual page table backed by the + * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The + * mapping is copied into all of the associated domains and made available to + * in-kernel users. + * + * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable + * object. When we go to attach a device to an IOAS we need to get an + * iommu_domain and wrapping iommufd_hw_pagetable for it. + * + * An iommu_domain & iommfd_hw_pagetable will be automatically selected + * for a device based on the hwpt_list. If no suitable iommu_domain + * is found a new iommu_domain will be created. + */ +struct iommufd_ioas { + struct iommufd_object obj; + struct io_pagetable iopt; + struct mutex mutex; + struct list_head hwpt_list; +}; + +static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd, + u32 id) +{ + return container_of(iommufd_get_object(ucmd->ictx, id, + IOMMUFD_OBJ_IOAS), + struct iommufd_ioas, obj); +} + +struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx); +int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd); +void iommufd_ioas_destroy(struct iommufd_object *obj); +int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd); +int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd); +int iommufd_ioas_map(struct iommufd_ucmd *ucmd); +int iommufd_ioas_copy(struct iommufd_ucmd *ucmd); +int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd); +int iommufd_ioas_option(struct iommufd_ucmd *ucmd); +int iommufd_option_rlimit_mode(struct iommu_option *cmd, + struct iommufd_ctx *ictx); + +int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd); + +/* + * A HW pagetable is called an iommu_domain inside the kernel. This user object + * allows directly creating and inspecting the domains. Domains that have kernel + * owned page tables will be associated with an iommufd_ioas that provides the + * IOVA to PFN map. + */ +struct iommufd_hw_pagetable { + struct iommufd_object obj; + struct iommufd_ioas *ioas; + struct iommu_domain *domain; + bool auto_domain : 1; + bool enforce_cache_coherency : 1; + bool msi_cookie : 1; + /* Head at iommufd_ioas::hwpt_list */ + struct list_head hwpt_item; + struct mutex devices_lock; + struct list_head devices; +}; + +struct iommufd_hw_pagetable * +iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas, + struct device *dev); +void iommufd_hw_pagetable_destroy(struct iommufd_object *obj); + +void iommufd_device_destroy(struct iommufd_object *obj); + +struct iommufd_access { + struct iommufd_object obj; + struct iommufd_ctx *ictx; + struct iommufd_ioas *ioas; + const struct iommufd_access_ops *ops; + void *data; + unsigned long iova_alignment; + u32 iopt_access_list_id; +}; + +int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access); +void iopt_remove_access(struct io_pagetable *iopt, + struct iommufd_access *access); +void iommufd_access_destroy_object(struct iommufd_object *obj); + +#ifdef CONFIG_IOMMUFD_TEST +struct iommufd_hw_pagetable * +iommufd_device_selftest_attach(struct iommufd_ctx *ictx, + struct iommufd_ioas *ioas, + struct device *mock_dev); +void iommufd_device_selftest_detach(struct iommufd_ctx *ictx, + struct iommufd_hw_pagetable *hwpt); +int iommufd_test(struct iommufd_ucmd *ucmd); +void iommufd_selftest_destroy(struct iommufd_object *obj); +extern size_t iommufd_test_memory_limit; +void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, u64 *iova, u32 *flags); +bool iommufd_should_fail(void); +void __init iommufd_test_init(void); +void iommufd_test_exit(void); +#else +static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, + u64 *iova, u32 *flags) +{ +} +static inline bool iommufd_should_fail(void) +{ + return false; +} +static inline void __init iommufd_test_init(void) +{ +} +static inline void iommufd_test_exit(void) +{ +} +#endif +#endif diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h new file mode 100644 index 0000000000000000000000000000000000000000..1d96a8f466fd2990667adc826ddb4bdf34534451 --- /dev/null +++ b/drivers/iommu/iommufd/iommufd_test.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef _UAPI_IOMMUFD_TEST_H +#define _UAPI_IOMMUFD_TEST_H + +#include +#include + +enum { + IOMMU_TEST_OP_ADD_RESERVED = 1, + IOMMU_TEST_OP_MOCK_DOMAIN, + IOMMU_TEST_OP_MD_CHECK_MAP, + IOMMU_TEST_OP_MD_CHECK_REFS, + IOMMU_TEST_OP_CREATE_ACCESS, + IOMMU_TEST_OP_DESTROY_ACCESS_PAGES, + IOMMU_TEST_OP_ACCESS_PAGES, + IOMMU_TEST_OP_ACCESS_RW, + IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT, +}; + +enum { + MOCK_APERTURE_START = 1UL << 24, + MOCK_APERTURE_LAST = (1UL << 31) - 1, +}; + +enum { + MOCK_FLAGS_ACCESS_WRITE = 1 << 0, + MOCK_FLAGS_ACCESS_SYZ = 1 << 16, +}; + +enum { + MOCK_ACCESS_RW_WRITE = 1 << 0, + MOCK_ACCESS_RW_SLOW_PATH = 1 << 2, +}; + +enum { + MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0, +}; + +struct iommu_test_cmd { + __u32 size; + __u32 op; + __u32 id; + __u32 __reserved; + union { + struct { + __aligned_u64 start; + __aligned_u64 length; + } add_reserved; + struct { + __u32 out_device_id; + __u32 out_hwpt_id; + } mock_domain; + struct { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + } check_map; + struct { + __aligned_u64 length; + __aligned_u64 uptr; + __u32 refs; + } check_refs; + struct { + __u32 out_access_fd; + __u32 flags; + } create_access; + struct { + __u32 access_pages_id; + } destroy_access_pages; + struct { + __u32 flags; + __u32 out_access_pages_id; + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + } access_pages; + struct { + __aligned_u64 iova; + __aligned_u64 length; + __aligned_u64 uptr; + __u32 flags; + } access_rw; + struct { + __u32 limit; + } memory_limit; + }; + __u32 last; +}; +#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32) + +#endif diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c new file mode 100644 index 0000000000000000000000000000000000000000..083e6fcbe10ad92b5214099da4e7e892734dd8aa --- /dev/null +++ b/drivers/iommu/iommufd/main.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2021 Intel Corporation + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + * + * iommufd provides control over the IOMMU HW objects created by IOMMU kernel + * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA + * addresses (IOVA) to CPU addresses. + */ +#define pr_fmt(fmt) "iommufd: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "iommufd_private.h" +#include "iommufd_test.h" + +struct iommufd_object_ops { + void (*destroy)(struct iommufd_object *obj); +}; +static const struct iommufd_object_ops iommufd_object_ops[]; +static struct miscdevice vfio_misc_dev; + +struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx, + size_t size, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + int rc; + + obj = kzalloc(size, GFP_KERNEL_ACCOUNT); + if (!obj) + return ERR_PTR(-ENOMEM); + obj->type = type; + init_rwsem(&obj->destroy_rwsem); + refcount_set(&obj->users, 1); + + /* + * Reserve an ID in the xarray but do not publish the pointer yet since + * the caller hasn't initialized it yet. Once the pointer is published + * in the xarray and visible to other threads we can't reliably destroy + * it anymore, so the caller must complete all errorable operations + * before calling iommufd_object_finalize(). + */ + rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY, + xa_limit_32b, GFP_KERNEL_ACCOUNT); + if (rc) + goto out_free; + return obj; +out_free: + kfree(obj); + return ERR_PTR(rc); +} + +/* + * Allow concurrent access to the object. + * + * Once another thread can see the object pointer it can prevent object + * destruction. Expect for special kernel-only objects there is no in-kernel way + * to reliably destroy a single object. Thus all APIs that are creating objects + * must use iommufd_object_abort() to handle their errors and only call + * iommufd_object_finalize() once object creation cannot fail. + */ +void iommufd_object_finalize(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + void *old; + + old = xa_store(&ictx->objects, obj->id, obj, GFP_KERNEL); + /* obj->id was returned from xa_alloc() so the xa_store() cannot fail */ + WARN_ON(old); +} + +/* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */ +void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj) +{ + void *old; + + old = xa_erase(&ictx->objects, obj->id); + WARN_ON(old); + kfree(obj); +} + +/* + * Abort an object that has been fully initialized and needs destroy, but has + * not been finalized. + */ +void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + iommufd_object_ops[obj->type].destroy(obj); + iommufd_object_abort(ictx, obj); +} + +struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id, + enum iommufd_object_type type) +{ + struct iommufd_object *obj; + + if (iommufd_should_fail()) + return ERR_PTR(-ENOENT); + + xa_lock(&ictx->objects); + obj = xa_load(&ictx->objects, id); + if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) || + !iommufd_lock_obj(obj)) + obj = ERR_PTR(-ENOENT); + xa_unlock(&ictx->objects); + return obj; +} + +/* + * The caller holds a users refcount and wants to destroy the object. Returns + * true if the object was destroyed. In all cases the caller no longer has a + * reference on obj. + */ +bool iommufd_object_destroy_user(struct iommufd_ctx *ictx, + struct iommufd_object *obj) +{ + /* + * The purpose of the destroy_rwsem is to ensure deterministic + * destruction of objects used by external drivers and destroyed by this + * function. Any temporary increment of the refcount must hold the read + * side of this, such as during ioctl execution. + */ + down_write(&obj->destroy_rwsem); + xa_lock(&ictx->objects); + refcount_dec(&obj->users); + if (!refcount_dec_if_one(&obj->users)) { + xa_unlock(&ictx->objects); + up_write(&obj->destroy_rwsem); + return false; + } + __xa_erase(&ictx->objects, obj->id); + if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj) + ictx->vfio_ioas = NULL; + xa_unlock(&ictx->objects); + up_write(&obj->destroy_rwsem); + + iommufd_object_ops[obj->type].destroy(obj); + kfree(obj); + return true; +} + +static int iommufd_destroy(struct iommufd_ucmd *ucmd) +{ + struct iommu_destroy *cmd = ucmd->cmd; + struct iommufd_object *obj; + + obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY); + if (IS_ERR(obj)) + return PTR_ERR(obj); + iommufd_ref_to_users(obj); + /* See iommufd_ref_to_users() */ + if (!iommufd_object_destroy_user(ucmd->ictx, obj)) + return -EBUSY; + return 0; +} + +static int iommufd_fops_open(struct inode *inode, struct file *filp) +{ + struct iommufd_ctx *ictx; + + ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT); + if (!ictx) + return -ENOMEM; + + /* + * For compatibility with VFIO when /dev/vfio/vfio is opened we default + * to the same rlimit accounting as vfio uses. + */ + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) && + filp->private_data == &vfio_misc_dev) { + ictx->account_mode = IOPT_PAGES_ACCOUNT_MM; + pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n"); + } + + xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT); + ictx->file = filp; + filp->private_data = ictx; + return 0; +} + +static int iommufd_fops_release(struct inode *inode, struct file *filp) +{ + struct iommufd_ctx *ictx = filp->private_data; + struct iommufd_object *obj; + + /* + * The objects in the xarray form a graph of "users" counts, and we have + * to destroy them in a depth first manner. Leaf objects will reduce the + * users count of interior objects when they are destroyed. + * + * Repeatedly destroying all the "1 users" leaf objects will progress + * until the entire list is destroyed. If this can't progress then there + * is some bug related to object refcounting. + */ + while (!xa_empty(&ictx->objects)) { + unsigned int destroyed = 0; + unsigned long index; + + xa_for_each(&ictx->objects, index, obj) { + if (!refcount_dec_if_one(&obj->users)) + continue; + destroyed++; + xa_erase(&ictx->objects, index); + iommufd_object_ops[obj->type].destroy(obj); + kfree(obj); + } + /* Bug related to users refcount */ + if (WARN_ON(!destroyed)) + break; + } + kfree(ictx); + return 0; +} + +static int iommufd_option(struct iommufd_ucmd *ucmd) +{ + struct iommu_option *cmd = ucmd->cmd; + int rc; + + if (cmd->__reserved) + return -EOPNOTSUPP; + + switch (cmd->option_id) { + case IOMMU_OPTION_RLIMIT_MODE: + rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx); + break; + case IOMMU_OPTION_HUGE_PAGES: + rc = iommufd_ioas_option(ucmd); + break; + default: + return -EOPNOTSUPP; + } + if (rc) + return rc; + if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64, + &cmd->val64, sizeof(cmd->val64))) + return -EFAULT; + return 0; +} + +union ucmd_buffer { + struct iommu_destroy destroy; + struct iommu_ioas_alloc alloc; + struct iommu_ioas_allow_iovas allow_iovas; + struct iommu_ioas_iova_ranges iova_ranges; + struct iommu_ioas_map map; + struct iommu_ioas_unmap unmap; +#ifdef CONFIG_IOMMUFD_TEST + struct iommu_test_cmd test; +#endif +}; + +struct iommufd_ioctl_op { + unsigned int size; + unsigned int min_size; + unsigned int ioctl_num; + int (*execute)(struct iommufd_ucmd *ucmd); +}; + +#define IOCTL_OP(_ioctl, _fn, _struct, _last) \ + [_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = { \ + .size = sizeof(_struct) + \ + BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ + sizeof(_struct)), \ + .min_size = offsetofend(_struct, _last), \ + .ioctl_num = _ioctl, \ + .execute = _fn, \ + } +static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = { + IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id), + IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl, + struct iommu_ioas_alloc, out_ioas_id), + IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas, + struct iommu_ioas_allow_iovas, allowed_iovas), + IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy, + src_iova), + IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges, + struct iommu_ioas_iova_ranges, out_iova_alignment), + IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map, + iova), + IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap, + length), + IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option, + val64), + IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas, + __reserved), +#ifdef CONFIG_IOMMUFD_TEST + IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last), +#endif +}; + +static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct iommufd_ctx *ictx = filp->private_data; + const struct iommufd_ioctl_op *op; + struct iommufd_ucmd ucmd = {}; + union ucmd_buffer buf; + unsigned int nr; + int ret; + + nr = _IOC_NR(cmd); + if (nr < IOMMUFD_CMD_BASE || + (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops)) + return iommufd_vfio_ioctl(ictx, cmd, arg); + + ucmd.ictx = ictx; + ucmd.ubuffer = (void __user *)arg; + ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); + if (ret) + return ret; + + op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE]; + if (op->ioctl_num != cmd) + return -ENOIOCTLCMD; + if (ucmd.user_size < op->min_size) + return -EINVAL; + + ucmd.cmd = &buf; + ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, + ucmd.user_size); + if (ret) + return ret; + ret = op->execute(&ucmd); + return ret; +} + +static const struct file_operations iommufd_fops = { + .owner = THIS_MODULE, + .open = iommufd_fops_open, + .release = iommufd_fops_release, + .unlocked_ioctl = iommufd_fops_ioctl, +}; + +/** + * iommufd_ctx_get - Get a context reference + * @ictx: Context to get + * + * The caller must already hold a valid reference to ictx. + */ +void iommufd_ctx_get(struct iommufd_ctx *ictx) +{ + get_file(ictx->file); +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD); + +/** + * iommufd_ctx_from_file - Acquires a reference to the iommufd context + * @file: File to obtain the reference from + * + * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file + * remains owned by the caller and the caller must still do fput. On success + * the caller is responsible to call iommufd_ctx_put(). + */ +struct iommufd_ctx *iommufd_ctx_from_file(struct file *file) +{ + struct iommufd_ctx *ictx; + + if (file->f_op != &iommufd_fops) + return ERR_PTR(-EBADFD); + ictx = file->private_data; + iommufd_ctx_get(ictx); + return ictx; +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD); + +/** + * iommufd_ctx_put - Put back a reference + * @ictx: Context to put back + */ +void iommufd_ctx_put(struct iommufd_ctx *ictx) +{ + fput(ictx->file); +} +EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD); + +static const struct iommufd_object_ops iommufd_object_ops[] = { + [IOMMUFD_OBJ_ACCESS] = { + .destroy = iommufd_access_destroy_object, + }, + [IOMMUFD_OBJ_DEVICE] = { + .destroy = iommufd_device_destroy, + }, + [IOMMUFD_OBJ_IOAS] = { + .destroy = iommufd_ioas_destroy, + }, + [IOMMUFD_OBJ_HW_PAGETABLE] = { + .destroy = iommufd_hw_pagetable_destroy, + }, +#ifdef CONFIG_IOMMUFD_TEST + [IOMMUFD_OBJ_SELFTEST] = { + .destroy = iommufd_selftest_destroy, + }, +#endif +}; + +static struct miscdevice iommu_misc_dev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "iommu", + .fops = &iommufd_fops, + .nodename = "iommu", + .mode = 0660, +}; + + +static struct miscdevice vfio_misc_dev = { + .minor = VFIO_MINOR, + .name = "vfio", + .fops = &iommufd_fops, + .nodename = "vfio/vfio", + .mode = 0666, +}; + +static int __init iommufd_init(void) +{ + int ret; + + ret = misc_register(&iommu_misc_dev); + if (ret) + return ret; + + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) { + ret = misc_register(&vfio_misc_dev); + if (ret) + goto err_misc; + } + iommufd_test_init(); + return 0; +err_misc: + misc_deregister(&iommu_misc_dev); + return ret; +} + +static void __exit iommufd_exit(void) +{ + iommufd_test_exit(); + if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) + misc_deregister(&vfio_misc_dev); + misc_deregister(&iommu_misc_dev); +} + +module_init(iommufd_init); +module_exit(iommufd_exit); + +#if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); +#endif +MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices"); +MODULE_LICENSE("GPL"); diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c new file mode 100644 index 0000000000000000000000000000000000000000..1e1d3509efae5e0068a34005f06752555b1a7bbc --- /dev/null +++ b/drivers/iommu/iommufd/pages.c @@ -0,0 +1,1977 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * The iopt_pages is the center of the storage and motion of PFNs. Each + * iopt_pages represents a logical linear array of full PFNs. The array is 0 + * based and has npages in it. Accessors use 'index' to refer to the entry in + * this logical array, regardless of its storage location. + * + * PFNs are stored in a tiered scheme: + * 1) iopt_pages::pinned_pfns xarray + * 2) An iommu_domain + * 3) The origin of the PFNs, i.e. the userspace pointer + * + * PFN have to be copied between all combinations of tiers, depending on the + * configuration. + * + * When a PFN is taken out of the userspace pointer it is pinned exactly once. + * The storage locations of the PFN's index are tracked in the two interval + * trees. If no interval includes the index then it is not pinned. + * + * If access_itree includes the PFN's index then an in-kernel access has + * requested the page. The PFN is stored in the xarray so other requestors can + * continue to find it. + * + * If the domains_itree includes the PFN's index then an iommu_domain is storing + * the PFN and it can be read back using iommu_iova_to_phys(). To avoid + * duplicating storage the xarray is not used if only iommu_domains are using + * the PFN's index. + * + * As a general principle this is designed so that destroy never fails. This + * means removing an iommu_domain or releasing a in-kernel access will not fail + * due to insufficient memory. In practice this means some cases have to hold + * PFNs in the xarray even though they are also being stored in an iommu_domain. + * + * While the iopt_pages can use an iommu_domain as storage, it does not have an + * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the + * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages + * and reference their own slice of the PFN array, with sub page granularity. + * + * In this file the term 'last' indicates an inclusive and closed interval, eg + * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to + * no PFNs. + * + * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so + * last_iova + 1 can overflow. An iopt_pages index will always be much less than + * ULONG_MAX so last_index + 1 cannot overflow. + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "double_span.h" + +#ifndef CONFIG_IOMMUFD_TEST +#define TEMP_MEMORY_LIMIT 65536 +#else +#define TEMP_MEMORY_LIMIT iommufd_test_memory_limit +#endif +#define BATCH_BACKUP_SIZE 32 + +/* + * More memory makes pin_user_pages() and the batching more efficient, but as + * this is only a performance optimization don't try too hard to get it. A 64k + * allocation can hold about 26M of 4k pages and 13G of 2M pages in an + * pfn_batch. Various destroy paths cannot fail and provide a small amount of + * stack memory as a backup contingency. If backup_len is given this cannot + * fail. + */ +static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len) +{ + void *res; + + if (WARN_ON(*size == 0)) + return NULL; + + if (*size < backup_len) + return backup; + + if (!backup && iommufd_should_fail()) + return NULL; + + *size = min_t(size_t, *size, TEMP_MEMORY_LIMIT); + res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (res) + return res; + *size = PAGE_SIZE; + if (backup_len) { + res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (res) + return res; + *size = backup_len; + return backup; + } + return kmalloc(*size, GFP_KERNEL); +} + +void interval_tree_double_span_iter_update( + struct interval_tree_double_span_iter *iter) +{ + unsigned long last_hole = ULONG_MAX; + unsigned int i; + + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) { + if (interval_tree_span_iter_done(&iter->spans[i])) { + iter->is_used = -1; + return; + } + + if (iter->spans[i].is_hole) { + last_hole = min(last_hole, iter->spans[i].last_hole); + continue; + } + + iter->is_used = i + 1; + iter->start_used = iter->spans[i].start_used; + iter->last_used = min(iter->spans[i].last_used, last_hole); + return; + } + + iter->is_used = 0; + iter->start_hole = iter->spans[0].start_hole; + iter->last_hole = + min(iter->spans[0].last_hole, iter->spans[1].last_hole); +} + +void interval_tree_double_span_iter_first( + struct interval_tree_double_span_iter *iter, + struct rb_root_cached *itree1, struct rb_root_cached *itree2, + unsigned long first_index, unsigned long last_index) +{ + unsigned int i; + + iter->itrees[0] = itree1; + iter->itrees[1] = itree2; + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) + interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i], + first_index, last_index); + interval_tree_double_span_iter_update(iter); +} + +void interval_tree_double_span_iter_next( + struct interval_tree_double_span_iter *iter) +{ + unsigned int i; + + if (iter->is_used == -1 || + iter->last_hole == iter->spans[0].last_index) { + iter->is_used = -1; + return; + } + + for (i = 0; i != ARRAY_SIZE(iter->spans); i++) + interval_tree_span_iter_advance( + &iter->spans[i], iter->itrees[i], iter->last_hole + 1); + interval_tree_double_span_iter_update(iter); +} + +static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages) +{ + int rc; + + rc = check_add_overflow(pages->npinned, npages, &pages->npinned); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(rc || pages->npinned > pages->npages); +} + +static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages) +{ + int rc; + + rc = check_sub_overflow(pages->npinned, npages, &pages->npinned); + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(rc || pages->npinned > pages->npages); +} + +static void iopt_pages_err_unpin(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **page_list) +{ + unsigned long npages = last_index - start_index + 1; + + unpin_user_pages(page_list, npages); + iopt_pages_sub_npinned(pages, npages); +} + +/* + * index is the number of PAGE_SIZE units from the start of the area's + * iopt_pages. If the iova is sub page-size then the area has an iova that + * covers a portion of the first and last pages in the range. + */ +static unsigned long iopt_area_index_to_iova(struct iopt_area *area, + unsigned long index) +{ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(index < iopt_area_index(area) || + index > iopt_area_last_index(area)); + index -= iopt_area_index(area); + if (index == 0) + return iopt_area_iova(area); + return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE; +} + +static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area, + unsigned long index) +{ + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(index < iopt_area_index(area) || + index > iopt_area_last_index(area)); + if (index == iopt_area_last_index(area)) + return iopt_area_last_iova(area); + return iopt_area_iova(area) - area->page_offset + + (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1; +} + +static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + size_t ret; + + ret = iommu_unmap(domain, iova, size); + /* + * It is a logic error in this code or a driver bug if the IOMMU unmaps + * something other than exactly as requested. This implies that the + * iommu driver may not fail unmap for reasons beyond bad agruments. + * Particularly, the iommu driver may not do a memory allocation on the + * unmap path. + */ + WARN_ON(ret != size); +} + +static void iopt_area_unmap_domain_range(struct iopt_area *area, + struct iommu_domain *domain, + unsigned long start_index, + unsigned long last_index) +{ + unsigned long start_iova = iopt_area_index_to_iova(area, start_index); + + iommu_unmap_nofail(domain, start_iova, + iopt_area_index_to_iova_last(area, last_index) - + start_iova + 1); +} + +static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages, + unsigned long index) +{ + struct interval_tree_node *node; + + node = interval_tree_iter_first(&pages->domains_itree, index, index); + if (!node) + return NULL; + return container_of(node, struct iopt_area, pages_node); +} + +/* + * A simple datastructure to hold a vector of PFNs, optimized for contiguous + * PFNs. This is used as a temporary holding memory for shuttling pfns from one + * place to another. Generally everything is made more efficient if operations + * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles, + * better cache locality, etc + */ +struct pfn_batch { + unsigned long *pfns; + u32 *npfns; + unsigned int array_size; + unsigned int end; + unsigned int total_pfns; +}; + +static void batch_clear(struct pfn_batch *batch) +{ + batch->total_pfns = 0; + batch->end = 0; + batch->pfns[0] = 0; + batch->npfns[0] = 0; +} + +/* + * Carry means we carry a portion of the final hugepage over to the front of the + * batch + */ +static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns) +{ + if (!keep_pfns) + return batch_clear(batch); + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(!batch->end || + batch->npfns[batch->end - 1] < keep_pfns); + + batch->total_pfns = keep_pfns; + batch->npfns[0] = keep_pfns; + batch->pfns[0] = batch->pfns[batch->end - 1] + + (batch->npfns[batch->end - 1] - keep_pfns); + batch->end = 0; +} + +static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns) +{ + if (!batch->total_pfns) + return; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(batch->total_pfns != batch->npfns[0]); + skip_pfns = min(batch->total_pfns, skip_pfns); + batch->pfns[0] += skip_pfns; + batch->npfns[0] -= skip_pfns; + batch->total_pfns -= skip_pfns; +} + +static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup, + size_t backup_len) +{ + const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns); + size_t size = max_pages * elmsz; + + batch->pfns = temp_kmalloc(&size, backup, backup_len); + if (!batch->pfns) + return -ENOMEM; + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz)) + return -EINVAL; + batch->array_size = size / elmsz; + batch->npfns = (u32 *)(batch->pfns + batch->array_size); + batch_clear(batch); + return 0; +} + +static int batch_init(struct pfn_batch *batch, size_t max_pages) +{ + return __batch_init(batch, max_pages, NULL, 0); +} + +static void batch_init_backup(struct pfn_batch *batch, size_t max_pages, + void *backup, size_t backup_len) +{ + __batch_init(batch, max_pages, backup, backup_len); +} + +static void batch_destroy(struct pfn_batch *batch, void *backup) +{ + if (batch->pfns != backup) + kfree(batch->pfns); +} + +/* true if the pfn was added, false otherwise */ +static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn) +{ + const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns)); + + if (batch->end && + pfn == batch->pfns[batch->end - 1] + batch->npfns[batch->end - 1] && + batch->npfns[batch->end - 1] != MAX_NPFNS) { + batch->npfns[batch->end - 1]++; + batch->total_pfns++; + return true; + } + if (batch->end == batch->array_size) + return false; + batch->total_pfns++; + batch->pfns[batch->end] = pfn; + batch->npfns[batch->end] = 1; + batch->end++; + return true; +} + +/* + * Fill the batch with pfns from the domain. When the batch is full, or it + * reaches last_index, the function will return. The caller should use + * batch->total_pfns to determine the starting point for the next iteration. + */ +static void batch_from_domain(struct pfn_batch *batch, + struct iommu_domain *domain, + struct iopt_area *area, unsigned long start_index, + unsigned long last_index) +{ + unsigned int page_offset = 0; + unsigned long iova; + phys_addr_t phys; + + iova = iopt_area_index_to_iova(area, start_index); + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + while (start_index <= last_index) { + /* + * This is pretty slow, it would be nice to get the page size + * back from the driver, or have the driver directly fill the + * batch. + */ + phys = iommu_iova_to_phys(domain, iova) - page_offset; + if (!batch_add_pfn(batch, PHYS_PFN(phys))) + return; + iova += PAGE_SIZE - page_offset; + page_offset = 0; + start_index++; + } +} + +static struct page **raw_pages_from_domain(struct iommu_domain *domain, + struct iopt_area *area, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + unsigned int page_offset = 0; + unsigned long iova; + phys_addr_t phys; + + iova = iopt_area_index_to_iova(area, start_index); + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + while (start_index <= last_index) { + phys = iommu_iova_to_phys(domain, iova) - page_offset; + *(out_pages++) = pfn_to_page(PHYS_PFN(phys)); + iova += PAGE_SIZE - page_offset; + page_offset = 0; + start_index++; + } + return out_pages; +} + +/* Continues reading a domain until we reach a discontinuity in the pfns. */ +static void batch_from_domain_continue(struct pfn_batch *batch, + struct iommu_domain *domain, + struct iopt_area *area, + unsigned long start_index, + unsigned long last_index) +{ + unsigned int array_size = batch->array_size; + + batch->array_size = batch->end; + batch_from_domain(batch, domain, area, start_index, last_index); + batch->array_size = array_size; +} + +/* + * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That + * mode permits splitting a mapped area up, and then one of the splits is + * unmapped. Doing this normally would cause us to violate our invariant of + * pairing map/unmap. Thus, to support old VFIO compatibility disable support + * for batching consecutive PFNs. All PFNs mapped into the iommu are done in + * PAGE_SIZE units, not larger or smaller. + */ +static int batch_iommu_map_small(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + unsigned long start_iova = iova; + int rc; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) + WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE || + size % PAGE_SIZE); + + while (size) { + rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot); + if (rc) + goto err_unmap; + iova += PAGE_SIZE; + paddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + return 0; + +err_unmap: + if (start_iova != iova) + iommu_unmap_nofail(domain, start_iova, iova - start_iova); + return rc; +} + +static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain, + struct iopt_area *area, unsigned long start_index) +{ + bool disable_large_pages = area->iopt->disable_large_pages; + unsigned long last_iova = iopt_area_last_iova(area); + unsigned int page_offset = 0; + unsigned long start_iova; + unsigned long next_iova; + unsigned int cur = 0; + unsigned long iova; + int rc; + + /* The first index might be a partial page */ + if (start_index == iopt_area_index(area)) + page_offset = area->page_offset; + next_iova = iova = start_iova = + iopt_area_index_to_iova(area, start_index); + while (cur < batch->end) { + next_iova = min(last_iova + 1, + next_iova + batch->npfns[cur] * PAGE_SIZE - + page_offset); + if (disable_large_pages) + rc = batch_iommu_map_small( + domain, iova, + PFN_PHYS(batch->pfns[cur]) + page_offset, + next_iova - iova, area->iommu_prot); + else + rc = iommu_map(domain, iova, + PFN_PHYS(batch->pfns[cur]) + page_offset, + next_iova - iova, area->iommu_prot); + if (rc) + goto err_unmap; + iova = next_iova; + page_offset = 0; + cur++; + } + return 0; +err_unmap: + if (start_iova != iova) + iommu_unmap_nofail(domain, start_iova, iova - start_iova); + return rc; +} + +static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa, + unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + rcu_read_lock(); + while (true) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + if (!batch_add_pfn(batch, xa_to_value(entry)) || + start_index == last_index) + break; + start_index++; + } + rcu_read_unlock(); +} + +static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa, + unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + xas_lock(&xas); + while (true) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + if (!batch_add_pfn(batch, xa_to_value(entry))) + break; + xas_store(&xas, NULL); + if (start_index == last_index) + break; + start_index++; + } + xas_unlock(&xas); +} + +static void clear_xarray(struct xarray *xa, unsigned long start_index, + unsigned long last_index) +{ + XA_STATE(xas, xa, start_index); + void *entry; + + xas_lock(&xas); + xas_for_each(&xas, entry, last_index) + xas_store(&xas, NULL); + xas_unlock(&xas); +} + +static int pages_to_xarray(struct xarray *xa, unsigned long start_index, + unsigned long last_index, struct page **pages) +{ + struct page **end_pages = pages + (last_index - start_index) + 1; + struct page **half_pages = pages + (end_pages - pages) / 2; + XA_STATE(xas, xa, start_index); + + do { + void *old; + + xas_lock(&xas); + while (pages != end_pages) { + /* xarray does not participate in fault injection */ + if (pages == half_pages && iommufd_should_fail()) { + xas_set_err(&xas, -EINVAL); + xas_unlock(&xas); + /* aka xas_destroy() */ + xas_nomem(&xas, GFP_KERNEL); + goto err_clear; + } + + old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages))); + if (xas_error(&xas)) + break; + WARN_ON(old); + pages++; + xas_next(&xas); + } + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + +err_clear: + if (xas_error(&xas)) { + if (xas.xa_index != start_index) + clear_xarray(xa, start_index, xas.xa_index - 1); + return xas_error(&xas); + } + return 0; +} + +static void batch_from_pages(struct pfn_batch *batch, struct page **pages, + size_t npages) +{ + struct page **end = pages + npages; + + for (; pages != end; pages++) + if (!batch_add_pfn(batch, page_to_pfn(*pages))) + break; +} + +static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages, + unsigned int first_page_off, size_t npages) +{ + unsigned int cur = 0; + + while (first_page_off) { + if (batch->npfns[cur] > first_page_off) + break; + first_page_off -= batch->npfns[cur]; + cur++; + } + + while (npages) { + size_t to_unpin = min_t(size_t, npages, + batch->npfns[cur] - first_page_off); + + unpin_user_page_range_dirty_lock( + pfn_to_page(batch->pfns[cur] + first_page_off), + to_unpin, pages->writable); + iopt_pages_sub_npinned(pages, to_unpin); + cur++; + first_page_off = 0; + npages -= to_unpin; + } +} + +static void copy_data_page(struct page *page, void *data, unsigned long offset, + size_t length, unsigned int flags) +{ + void *mem; + + mem = kmap_local_page(page); + if (flags & IOMMUFD_ACCESS_RW_WRITE) { + memcpy(mem + offset, data, length); + set_page_dirty_lock(page); + } else { + memcpy(data, mem + offset, length); + } + kunmap_local(mem); +} + +static unsigned long batch_rw(struct pfn_batch *batch, void *data, + unsigned long offset, unsigned long length, + unsigned int flags) +{ + unsigned long copied = 0; + unsigned int npage = 0; + unsigned int cur = 0; + + while (cur < batch->end) { + unsigned long bytes = min(length, PAGE_SIZE - offset); + + copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data, + offset, bytes, flags); + offset = 0; + length -= bytes; + data += bytes; + copied += bytes; + npage++; + if (npage == batch->npfns[cur]) { + npage = 0; + cur++; + } + if (!length) + break; + } + return copied; +} + +/* pfn_reader_user is just the pin_user_pages() path */ +struct pfn_reader_user { + struct page **upages; + size_t upages_len; + unsigned long upages_start; + unsigned long upages_end; + unsigned int gup_flags; + /* + * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is + * neither + */ + int locked; +}; + +static void pfn_reader_user_init(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + user->upages = NULL; + user->upages_start = 0; + user->upages_end = 0; + user->locked = -1; + + user->gup_flags = FOLL_LONGTERM; + if (pages->writable) + user->gup_flags |= FOLL_WRITE; +} + +static void pfn_reader_user_destroy(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + if (user->locked != -1) { + if (user->locked) + mmap_read_unlock(pages->source_mm); + if (pages->source_mm != current->mm) + mmput(pages->source_mm); + user->locked = -1; + } + + kfree(user->upages); + user->upages = NULL; +} + +static int pfn_reader_user_pin(struct pfn_reader_user *user, + struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index) +{ + bool remote_mm = pages->source_mm != current->mm; + unsigned long npages; + uintptr_t uptr; + long rc; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(last_index < start_index)) + return -EINVAL; + + if (!user->upages) { + /* All undone in pfn_reader_destroy() */ + user->upages_len = + (last_index - start_index + 1) * sizeof(*user->upages); + user->upages = temp_kmalloc(&user->upages_len, NULL, 0); + if (!user->upages) + return -ENOMEM; + } + + if (user->locked == -1) { + /* + * The majority of usages will run the map task within the mm + * providing the pages, so we can optimize into + * get_user_pages_fast() + */ + if (remote_mm) { + if (!mmget_not_zero(pages->source_mm)) + return -EFAULT; + } + user->locked = 0; + } + + npages = min_t(unsigned long, last_index - start_index + 1, + user->upages_len / sizeof(*user->upages)); + + + if (iommufd_should_fail()) + return -EFAULT; + + uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE); + if (!remote_mm) + rc = pin_user_pages_fast(uptr, npages, user->gup_flags, + user->upages); + else { + if (!user->locked) { + mmap_read_lock(pages->source_mm); + user->locked = 1; + } + rc = pin_user_pages_remote(pages->source_mm, uptr, npages, + user->gup_flags, user->upages, NULL, + &user->locked); + } + if (rc <= 0) { + if (WARN_ON(!rc)) + return -EFAULT; + return rc; + } + iopt_pages_add_npinned(pages, rc); + user->upages_start = start_index; + user->upages_end = start_index + rc; + return 0; +} + +/* This is the "modern" and faster accounting method used by io_uring */ +static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) +{ + unsigned long lock_limit; + unsigned long cur_pages; + unsigned long new_pages; + + lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >> + PAGE_SHIFT; + do { + cur_pages = atomic_long_read(&pages->source_user->locked_vm); + new_pages = cur_pages + npages; + if (new_pages > lock_limit) + return -ENOMEM; + } while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages, + new_pages) != cur_pages); + return 0; +} + +static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages) +{ + if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages)) + return; + atomic_long_sub(npages, &pages->source_user->locked_vm); +} + +/* This is the accounting method used for compatibility with VFIO */ +static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages, + bool inc, struct pfn_reader_user *user) +{ + bool do_put = false; + int rc; + + if (user && user->locked) { + mmap_read_unlock(pages->source_mm); + user->locked = 0; + /* If we had the lock then we also have a get */ + } else if ((!user || !user->upages) && + pages->source_mm != current->mm) { + if (!mmget_not_zero(pages->source_mm)) + return -EINVAL; + do_put = true; + } + + mmap_write_lock(pages->source_mm); + rc = __account_locked_vm(pages->source_mm, npages, inc, + pages->source_task, false); + mmap_write_unlock(pages->source_mm); + + if (do_put) + mmput(pages->source_mm); + return rc; +} + +static int do_update_pinned(struct iopt_pages *pages, unsigned long npages, + bool inc, struct pfn_reader_user *user) +{ + int rc = 0; + + switch (pages->account_mode) { + case IOPT_PAGES_ACCOUNT_NONE: + break; + case IOPT_PAGES_ACCOUNT_USER: + if (inc) + rc = incr_user_locked_vm(pages, npages); + else + decr_user_locked_vm(pages, npages); + break; + case IOPT_PAGES_ACCOUNT_MM: + rc = update_mm_locked_vm(pages, npages, inc, user); + break; + } + if (rc) + return rc; + + pages->last_npinned = pages->npinned; + if (inc) + atomic64_add(npages, &pages->source_mm->pinned_vm); + else + atomic64_sub(npages, &pages->source_mm->pinned_vm); + return 0; +} + +static void update_unpinned(struct iopt_pages *pages) +{ + if (WARN_ON(pages->npinned > pages->last_npinned)) + return; + if (pages->npinned == pages->last_npinned) + return; + do_update_pinned(pages, pages->last_npinned - pages->npinned, false, + NULL); +} + +/* + * Changes in the number of pages pinned is done after the pages have been read + * and processed. If the user lacked the limit then the error unwind will unpin + * everything that was just pinned. This is because it is expensive to calculate + * how many pages we have already pinned within a range to generate an accurate + * prediction in advance of doing the work to actually pin them. + */ +static int pfn_reader_user_update_pinned(struct pfn_reader_user *user, + struct iopt_pages *pages) +{ + unsigned long npages; + bool inc; + + lockdep_assert_held(&pages->mutex); + + if (pages->npinned == pages->last_npinned) + return 0; + + if (pages->npinned < pages->last_npinned) { + npages = pages->last_npinned - pages->npinned; + inc = false; + } else { + if (iommufd_should_fail()) + return -ENOMEM; + npages = pages->npinned - pages->last_npinned; + inc = true; + } + return do_update_pinned(pages, npages, inc, user); +} + +/* + * PFNs are stored in three places, in order of preference: + * - The iopt_pages xarray. This is only populated if there is a + * iopt_pages_access + * - The iommu_domain under an area + * - The original PFN source, ie pages->source_mm + * + * This iterator reads the pfns optimizing to load according to the + * above order. + */ +struct pfn_reader { + struct iopt_pages *pages; + struct interval_tree_double_span_iter span; + struct pfn_batch batch; + unsigned long batch_start_index; + unsigned long batch_end_index; + unsigned long last_index; + + struct pfn_reader_user user; +}; + +static int pfn_reader_update_pinned(struct pfn_reader *pfns) +{ + return pfn_reader_user_update_pinned(&pfns->user, pfns->pages); +} + +/* + * The batch can contain a mixture of pages that are still in use and pages that + * need to be unpinned. Unpin only pages that are not held anywhere else. + */ +static void pfn_reader_unpin(struct pfn_reader *pfns) +{ + unsigned long last = pfns->batch_end_index - 1; + unsigned long start = pfns->batch_start_index; + struct interval_tree_double_span_iter span; + struct iopt_pages *pages = pfns->pages; + + lockdep_assert_held(&pages->mutex); + + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start, last) { + if (span.is_used) + continue; + + batch_unpin(&pfns->batch, pages, span.start_hole - start, + span.last_hole - span.start_hole + 1); + } +} + +/* Process a single span to load it from the proper storage */ +static int pfn_reader_fill_span(struct pfn_reader *pfns) +{ + struct interval_tree_double_span_iter *span = &pfns->span; + unsigned long start_index = pfns->batch_end_index; + struct iopt_area *area; + int rc; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(span->last_used < start_index)) + return -EINVAL; + + if (span->is_used == 1) { + batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns, + start_index, span->last_used); + return 0; + } + + if (span->is_used == 2) { + /* + * Pull as many pages from the first domain we find in the + * target span. If it is too small then we will be called again + * and we'll find another area. + */ + area = iopt_pages_find_domain_area(pfns->pages, start_index); + if (WARN_ON(!area)) + return -EINVAL; + + /* The storage_domain cannot change without the pages mutex */ + batch_from_domain( + &pfns->batch, area->storage_domain, area, start_index, + min(iopt_area_last_index(area), span->last_used)); + return 0; + } + + if (start_index >= pfns->user.upages_end) { + rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index, + span->last_hole); + if (rc) + return rc; + } + + batch_from_pages(&pfns->batch, + pfns->user.upages + + (start_index - pfns->user.upages_start), + pfns->user.upages_end - start_index); + return 0; +} + +static bool pfn_reader_done(struct pfn_reader *pfns) +{ + return pfns->batch_start_index == pfns->last_index + 1; +} + +static int pfn_reader_next(struct pfn_reader *pfns) +{ + int rc; + + batch_clear(&pfns->batch); + pfns->batch_start_index = pfns->batch_end_index; + + while (pfns->batch_end_index != pfns->last_index + 1) { + unsigned int npfns = pfns->batch.total_pfns; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(interval_tree_double_span_iter_done(&pfns->span))) + return -EINVAL; + + rc = pfn_reader_fill_span(pfns); + if (rc) + return rc; + + if (WARN_ON(!pfns->batch.total_pfns)) + return -EINVAL; + + pfns->batch_end_index = + pfns->batch_start_index + pfns->batch.total_pfns; + if (pfns->batch_end_index == pfns->span.last_used + 1) + interval_tree_double_span_iter_next(&pfns->span); + + /* Batch is full */ + if (npfns == pfns->batch.total_pfns) + return 0; + } + return 0; +} + +static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages, + unsigned long start_index, unsigned long last_index) +{ + int rc; + + lockdep_assert_held(&pages->mutex); + + pfns->pages = pages; + pfns->batch_start_index = start_index; + pfns->batch_end_index = start_index; + pfns->last_index = last_index; + pfn_reader_user_init(&pfns->user, pages); + rc = batch_init(&pfns->batch, last_index - start_index + 1); + if (rc) + return rc; + interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index); + return 0; +} + +/* + * There are many assertions regarding the state of pages->npinned vs + * pages->last_pinned, for instance something like unmapping a domain must only + * decrement the npinned, and pfn_reader_destroy() must be called only after all + * the pins are updated. This is fine for success flows, but error flows + * sometimes need to release the pins held inside the pfn_reader before going on + * to complete unmapping and releasing pins held in domains. + */ +static void pfn_reader_release_pins(struct pfn_reader *pfns) +{ + struct iopt_pages *pages = pfns->pages; + + if (pfns->user.upages_end > pfns->batch_end_index) { + size_t npages = pfns->user.upages_end - pfns->batch_end_index; + + /* Any pages not transferred to the batch are just unpinned */ + unpin_user_pages(pfns->user.upages + (pfns->batch_end_index - + pfns->user.upages_start), + npages); + iopt_pages_sub_npinned(pages, npages); + pfns->user.upages_end = pfns->batch_end_index; + } + if (pfns->batch_start_index != pfns->batch_end_index) { + pfn_reader_unpin(pfns); + pfns->batch_start_index = pfns->batch_end_index; + } +} + +static void pfn_reader_destroy(struct pfn_reader *pfns) +{ + struct iopt_pages *pages = pfns->pages; + + pfn_reader_release_pins(pfns); + pfn_reader_user_destroy(&pfns->user, pfns->pages); + batch_destroy(&pfns->batch, NULL); + WARN_ON(pages->last_npinned != pages->npinned); +} + +static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages, + unsigned long start_index, unsigned long last_index) +{ + int rc; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + WARN_ON(last_index < start_index)) + return -EINVAL; + + rc = pfn_reader_init(pfns, pages, start_index, last_index); + if (rc) + return rc; + rc = pfn_reader_next(pfns); + if (rc) { + pfn_reader_destroy(pfns); + return rc; + } + return 0; +} + +struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length, + bool writable) +{ + struct iopt_pages *pages; + + /* + * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP + * below from overflow + */ + if (length > SIZE_MAX - PAGE_SIZE || length == 0) + return ERR_PTR(-EINVAL); + + pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT); + if (!pages) + return ERR_PTR(-ENOMEM); + + kref_init(&pages->kref); + xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT); + mutex_init(&pages->mutex); + pages->source_mm = current->mm; + mmgrab(pages->source_mm); + pages->uptr = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE); + pages->npages = DIV_ROUND_UP(length + (uptr - pages->uptr), PAGE_SIZE); + pages->access_itree = RB_ROOT_CACHED; + pages->domains_itree = RB_ROOT_CACHED; + pages->writable = writable; + if (capable(CAP_IPC_LOCK)) + pages->account_mode = IOPT_PAGES_ACCOUNT_NONE; + else + pages->account_mode = IOPT_PAGES_ACCOUNT_USER; + pages->source_task = current->group_leader; + get_task_struct(current->group_leader); + pages->source_user = get_uid(current_user()); + return pages; +} + +void iopt_release_pages(struct kref *kref) +{ + struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref); + + WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root)); + WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root)); + WARN_ON(pages->npinned); + WARN_ON(!xa_empty(&pages->pinned_pfns)); + mmdrop(pages->source_mm); + mutex_destroy(&pages->mutex); + put_task_struct(pages->source_task); + free_uid(pages->source_user); + kfree(pages); +} + +static void +iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area, + struct iopt_pages *pages, struct iommu_domain *domain, + unsigned long start_index, unsigned long last_index, + unsigned long *unmapped_end_index, + unsigned long real_last_index) +{ + while (start_index <= last_index) { + unsigned long batch_last_index; + + if (*unmapped_end_index <= last_index) { + unsigned long start = + max(start_index, *unmapped_end_index); + + batch_from_domain(batch, domain, area, start, + last_index); + batch_last_index = start + batch->total_pfns - 1; + } else { + batch_last_index = last_index; + } + + /* + * unmaps must always 'cut' at a place where the pfns are not + * contiguous to pair with the maps that always install + * contiguous pages. Thus, if we have to stop unpinning in the + * middle of the domains we need to keep reading pfns until we + * find a cut point to do the unmap. The pfns we read are + * carried over and either skipped or integrated into the next + * batch. + */ + if (batch_last_index == last_index && + last_index != real_last_index) + batch_from_domain_continue(batch, domain, area, + last_index + 1, + real_last_index); + + if (*unmapped_end_index <= batch_last_index) { + iopt_area_unmap_domain_range( + area, domain, *unmapped_end_index, + start_index + batch->total_pfns - 1); + *unmapped_end_index = start_index + batch->total_pfns; + } + + /* unpin must follow unmap */ + batch_unpin(batch, pages, 0, + batch_last_index - start_index + 1); + start_index = batch_last_index + 1; + + batch_clear_carry(batch, + *unmapped_end_index - batch_last_index - 1); + } +} + +static void __iopt_area_unfill_domain(struct iopt_area *area, + struct iopt_pages *pages, + struct iommu_domain *domain, + unsigned long last_index) +{ + struct interval_tree_double_span_iter span; + unsigned long start_index = iopt_area_index(area); + unsigned long unmapped_end_index = start_index; + u64 backup[BATCH_BACKUP_SIZE]; + struct pfn_batch batch; + + lockdep_assert_held(&pages->mutex); + + /* + * For security we must not unpin something that is still DMA mapped, + * so this must unmap any IOVA before we go ahead and unpin the pages. + * This creates a complexity where we need to skip over unpinning pages + * held in the xarray, but continue to unmap from the domain. + * + * The domain unmap cannot stop in the middle of a contiguous range of + * PFNs. To solve this problem the unpinning step will read ahead to the + * end of any contiguous span, unmap that whole span, and then only + * unpin the leading part that does not have any accesses. The residual + * PFNs that were unmapped but not unpinned are called a "carry" in the + * batch as they are moved to the front of the PFN list and continue on + * to the next iteration(s). + */ + batch_init_backup(&batch, last_index + 1, backup, sizeof(backup)); + interval_tree_for_each_double_span(&span, &pages->domains_itree, + &pages->access_itree, start_index, + last_index) { + if (span.is_used) { + batch_skip_carry(&batch, + span.last_used - span.start_used + 1); + continue; + } + iopt_area_unpin_domain(&batch, area, pages, domain, + span.start_hole, span.last_hole, + &unmapped_end_index, last_index); + } + /* + * If the range ends in a access then we do the residual unmap without + * any unpins. + */ + if (unmapped_end_index != last_index + 1) + iopt_area_unmap_domain_range(area, domain, unmapped_end_index, + last_index); + WARN_ON(batch.total_pfns); + batch_destroy(&batch, backup); + update_unpinned(pages); +} + +static void iopt_area_unfill_partial_domain(struct iopt_area *area, + struct iopt_pages *pages, + struct iommu_domain *domain, + unsigned long end_index) +{ + if (end_index != iopt_area_index(area)) + __iopt_area_unfill_domain(area, pages, domain, end_index - 1); +} + +/** + * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain + * @area: The IOVA range to unmap + * @domain: The domain to unmap + * + * The caller must know that unpinning is not required, usually because there + * are other domains in the iopt. + */ +void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain) +{ + iommu_unmap_nofail(domain, iopt_area_iova(area), + iopt_area_length(area)); +} + +/** + * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain + * @area: IOVA area to use + * @pages: page supplier for the area (area->pages is NULL) + * @domain: Domain to unmap from + * + * The domain should be removed from the domains_itree before calling. The + * domain will always be unmapped, but the PFNs may not be unpinned if there are + * still accesses. + */ +void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages, + struct iommu_domain *domain) +{ + __iopt_area_unfill_domain(area, pages, domain, + iopt_area_last_index(area)); +} + +/** + * iopt_area_fill_domain() - Map PFNs from the area into a domain + * @area: IOVA area to use + * @domain: Domain to load PFNs into + * + * Read the pfns from the area's underlying iopt_pages and map them into the + * given domain. Called when attaching a new domain to an io_pagetable. + */ +int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain) +{ + unsigned long done_end_index; + struct pfn_reader pfns; + int rc; + + lockdep_assert_held(&area->pages->mutex); + + rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area), + iopt_area_last_index(area)); + if (rc) + return rc; + + while (!pfn_reader_done(&pfns)) { + done_end_index = pfns.batch_start_index; + rc = batch_to_domain(&pfns.batch, domain, area, + pfns.batch_start_index); + if (rc) + goto out_unmap; + done_end_index = pfns.batch_end_index; + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_unmap; + } + + rc = pfn_reader_update_pinned(&pfns); + if (rc) + goto out_unmap; + goto out_destroy; + +out_unmap: + pfn_reader_release_pins(&pfns); + iopt_area_unfill_partial_domain(area, area->pages, domain, + done_end_index); +out_destroy: + pfn_reader_destroy(&pfns); + return rc; +} + +/** + * iopt_area_fill_domains() - Install PFNs into the area's domains + * @area: The area to act on + * @pages: The pages associated with the area (area->pages is NULL) + * + * Called during area creation. The area is freshly created and not inserted in + * the domains_itree yet. PFNs are read and loaded into every domain held in the + * area's io_pagetable and the area is installed in the domains_itree. + * + * On failure all domains are left unchanged. + */ +int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages) +{ + unsigned long done_first_end_index; + unsigned long done_all_end_index; + struct iommu_domain *domain; + unsigned long unmap_index; + struct pfn_reader pfns; + unsigned long index; + int rc; + + lockdep_assert_held(&area->iopt->domains_rwsem); + + if (xa_empty(&area->iopt->domains)) + return 0; + + mutex_lock(&pages->mutex); + rc = pfn_reader_first(&pfns, pages, iopt_area_index(area), + iopt_area_last_index(area)); + if (rc) + goto out_unlock; + + while (!pfn_reader_done(&pfns)) { + done_first_end_index = pfns.batch_end_index; + done_all_end_index = pfns.batch_start_index; + xa_for_each(&area->iopt->domains, index, domain) { + rc = batch_to_domain(&pfns.batch, domain, area, + pfns.batch_start_index); + if (rc) + goto out_unmap; + } + done_all_end_index = done_first_end_index; + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_unmap; + } + rc = pfn_reader_update_pinned(&pfns); + if (rc) + goto out_unmap; + + area->storage_domain = xa_load(&area->iopt->domains, 0); + interval_tree_insert(&area->pages_node, &pages->domains_itree); + goto out_destroy; + +out_unmap: + pfn_reader_release_pins(&pfns); + xa_for_each(&area->iopt->domains, unmap_index, domain) { + unsigned long end_index; + + if (unmap_index < index) + end_index = done_first_end_index; + else + end_index = done_all_end_index; + + /* + * The area is not yet part of the domains_itree so we have to + * manage the unpinning specially. The last domain does the + * unpin, every other domain is just unmapped. + */ + if (unmap_index != area->iopt->next_domain_id - 1) { + if (end_index != iopt_area_index(area)) + iopt_area_unmap_domain_range( + area, domain, iopt_area_index(area), + end_index - 1); + } else { + iopt_area_unfill_partial_domain(area, pages, domain, + end_index); + } + } +out_destroy: + pfn_reader_destroy(&pfns); +out_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/** + * iopt_area_unfill_domains() - unmap PFNs from the area's domains + * @area: The area to act on + * @pages: The pages associated with the area (area->pages is NULL) + * + * Called during area destruction. This unmaps the iova's covered by all the + * area's domains and releases the PFNs. + */ +void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages) +{ + struct io_pagetable *iopt = area->iopt; + struct iommu_domain *domain; + unsigned long index; + + lockdep_assert_held(&iopt->domains_rwsem); + + mutex_lock(&pages->mutex); + if (!area->storage_domain) + goto out_unlock; + + xa_for_each(&iopt->domains, index, domain) + if (domain != area->storage_domain) + iopt_area_unmap_domain_range( + area, domain, iopt_area_index(area), + iopt_area_last_index(area)); + + interval_tree_remove(&area->pages_node, &pages->domains_itree); + iopt_area_unfill_domain(area, pages, area->storage_domain); + area->storage_domain = NULL; +out_unlock: + mutex_unlock(&pages->mutex); +} + +static void iopt_pages_unpin_xarray(struct pfn_batch *batch, + struct iopt_pages *pages, + unsigned long start_index, + unsigned long end_index) +{ + while (start_index <= end_index) { + batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index, + end_index); + batch_unpin(batch, pages, 0, batch->total_pfns); + start_index += batch->total_pfns; + batch_clear(batch); + } +} + +/** + * iopt_pages_unfill_xarray() - Update the xarry after removing an access + * @pages: The pages to act on + * @start_index: Starting PFN index + * @last_index: Last PFN index + * + * Called when an iopt_pages_access is removed, removes pages from the itree. + * The access should already be removed from the access_itree. + */ +void iopt_pages_unfill_xarray(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index) +{ + struct interval_tree_double_span_iter span; + u64 backup[BATCH_BACKUP_SIZE]; + struct pfn_batch batch; + bool batch_inited = false; + + lockdep_assert_held(&pages->mutex); + + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index) { + if (!span.is_used) { + if (!batch_inited) { + batch_init_backup(&batch, + last_index - start_index + 1, + backup, sizeof(backup)); + batch_inited = true; + } + iopt_pages_unpin_xarray(&batch, pages, span.start_hole, + span.last_hole); + } else if (span.is_used == 2) { + /* Covered by a domain */ + clear_xarray(&pages->pinned_pfns, span.start_used, + span.last_used); + } + /* Otherwise covered by an existing access */ + } + if (batch_inited) + batch_destroy(&batch, backup); + update_unpinned(pages); +} + +/** + * iopt_pages_fill_from_xarray() - Fast path for reading PFNs + * @pages: The pages to act on + * @start_index: The first page index in the range + * @last_index: The last page index in the range + * @out_pages: The output array to return the pages + * + * This can be called if the caller is holding a refcount on an + * iopt_pages_access that is known to have already been filled. It quickly reads + * the pages directly from the xarray. + * + * This is part of the SW iommu interface to read pages for in-kernel use. + */ +void iopt_pages_fill_from_xarray(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + XA_STATE(xas, &pages->pinned_pfns, start_index); + void *entry; + + rcu_read_lock(); + while (start_index <= last_index) { + entry = xas_next(&xas); + if (xas_retry(&xas, entry)) + continue; + WARN_ON(!xa_is_value(entry)); + *(out_pages++) = pfn_to_page(xa_to_value(entry)); + start_index++; + } + rcu_read_unlock(); +} + +static int iopt_pages_fill_from_domain(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + while (start_index != last_index + 1) { + unsigned long domain_last; + struct iopt_area *area; + + area = iopt_pages_find_domain_area(pages, start_index); + if (WARN_ON(!area)) + return -EINVAL; + + domain_last = min(iopt_area_last_index(area), last_index); + out_pages = raw_pages_from_domain(area->storage_domain, area, + start_index, domain_last, + out_pages); + start_index = domain_last + 1; + } + return 0; +} + +static int iopt_pages_fill_from_mm(struct iopt_pages *pages, + struct pfn_reader_user *user, + unsigned long start_index, + unsigned long last_index, + struct page **out_pages) +{ + unsigned long cur_index = start_index; + int rc; + + while (cur_index != last_index + 1) { + user->upages = out_pages + (cur_index - start_index); + rc = pfn_reader_user_pin(user, pages, cur_index, last_index); + if (rc) + goto out_unpin; + cur_index = user->upages_end; + } + return 0; + +out_unpin: + if (start_index != cur_index) + iopt_pages_err_unpin(pages, start_index, cur_index - 1, + out_pages); + return rc; +} + +/** + * iopt_pages_fill_xarray() - Read PFNs + * @pages: The pages to act on + * @start_index: The first page index in the range + * @last_index: The last page index in the range + * @out_pages: The output array to return the pages, may be NULL + * + * This populates the xarray and returns the pages in out_pages. As the slow + * path this is able to copy pages from other storage tiers into the xarray. + * + * On failure the xarray is left unchanged. + * + * This is part of the SW iommu interface to read pages for in-kernel use. + */ +int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index, + unsigned long last_index, struct page **out_pages) +{ + struct interval_tree_double_span_iter span; + unsigned long xa_end = start_index; + struct pfn_reader_user user; + int rc; + + lockdep_assert_held(&pages->mutex); + + pfn_reader_user_init(&user, pages); + user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages); + interval_tree_for_each_double_span(&span, &pages->access_itree, + &pages->domains_itree, start_index, + last_index) { + struct page **cur_pages; + + if (span.is_used == 1) { + cur_pages = out_pages + (span.start_used - start_index); + iopt_pages_fill_from_xarray(pages, span.start_used, + span.last_used, cur_pages); + continue; + } + + if (span.is_used == 2) { + cur_pages = out_pages + (span.start_used - start_index); + iopt_pages_fill_from_domain(pages, span.start_used, + span.last_used, cur_pages); + rc = pages_to_xarray(&pages->pinned_pfns, + span.start_used, span.last_used, + cur_pages); + if (rc) + goto out_clean_xa; + xa_end = span.last_used + 1; + continue; + } + + /* hole */ + cur_pages = out_pages + (span.start_hole - start_index); + rc = iopt_pages_fill_from_mm(pages, &user, span.start_hole, + span.last_hole, cur_pages); + if (rc) + goto out_clean_xa; + rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole, + span.last_hole, cur_pages); + if (rc) { + iopt_pages_err_unpin(pages, span.start_hole, + span.last_hole, cur_pages); + goto out_clean_xa; + } + xa_end = span.last_hole + 1; + } + rc = pfn_reader_user_update_pinned(&user, pages); + if (rc) + goto out_clean_xa; + user.upages = NULL; + pfn_reader_user_destroy(&user, pages); + return 0; + +out_clean_xa: + if (start_index != xa_end) + iopt_pages_unfill_xarray(pages, start_index, xa_end - 1); + user.upages = NULL; + pfn_reader_user_destroy(&user, pages); + return rc; +} + +/* + * This uses the pfn_reader instead of taking a shortcut by using the mm. It can + * do every scenario and is fully consistent with what an iommu_domain would + * see. + */ +static int iopt_pages_rw_slow(struct iopt_pages *pages, + unsigned long start_index, + unsigned long last_index, unsigned long offset, + void *data, unsigned long length, + unsigned int flags) +{ + struct pfn_reader pfns; + int rc; + + mutex_lock(&pages->mutex); + + rc = pfn_reader_first(&pfns, pages, start_index, last_index); + if (rc) + goto out_unlock; + + while (!pfn_reader_done(&pfns)) { + unsigned long done; + + done = batch_rw(&pfns.batch, data, offset, length, flags); + data += done; + length -= done; + offset = 0; + pfn_reader_unpin(&pfns); + + rc = pfn_reader_next(&pfns); + if (rc) + goto out_destroy; + } + if (WARN_ON(length != 0)) + rc = -EINVAL; +out_destroy: + pfn_reader_destroy(&pfns); +out_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/* + * A medium speed path that still allows DMA inconsistencies, but doesn't do any + * memory allocations or interval tree searches. + */ +static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index, + unsigned long offset, void *data, + unsigned long length, unsigned int flags) +{ + struct page *page = NULL; + int rc; + + if (!mmget_not_zero(pages->source_mm)) + return iopt_pages_rw_slow(pages, index, index, offset, data, + length, flags); + + if (iommufd_should_fail()) { + rc = -EINVAL; + goto out_mmput; + } + + mmap_read_lock(pages->source_mm); + rc = pin_user_pages_remote( + pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE), + 1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page, + NULL, NULL); + mmap_read_unlock(pages->source_mm); + if (rc != 1) { + if (WARN_ON(rc >= 0)) + rc = -EINVAL; + goto out_mmput; + } + copy_data_page(page, data, offset, length, flags); + unpin_user_page(page); + rc = 0; + +out_mmput: + mmput(pages->source_mm); + return rc; +} + +/** + * iopt_pages_rw_access - Copy to/from a linear slice of the pages + * @pages: pages to act on + * @start_byte: First byte of pages to copy to/from + * @data: Kernel buffer to get/put the data + * @length: Number of bytes to copy + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * This will find each page in the range, kmap it and then memcpy to/from + * the given kernel buffer. + */ +int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte, + void *data, unsigned long length, unsigned int flags) +{ + unsigned long start_index = start_byte / PAGE_SIZE; + unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE; + bool change_mm = current->mm != pages->source_mm; + int rc = 0; + + if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && + (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH)) + change_mm = true; + + if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) + return -EPERM; + + if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) { + if (start_index == last_index) + return iopt_pages_rw_page(pages, start_index, + start_byte % PAGE_SIZE, data, + length, flags); + return iopt_pages_rw_slow(pages, start_index, last_index, + start_byte % PAGE_SIZE, data, length, + flags); + } + + /* + * Try to copy using copy_to_user(). We do this as a fast path and + * ignore any pinning inconsistencies, unlike a real DMA path. + */ + if (change_mm) { + if (!mmget_not_zero(pages->source_mm)) + return iopt_pages_rw_slow(pages, start_index, + last_index, + start_byte % PAGE_SIZE, data, + length, flags); + kthread_use_mm(pages->source_mm); + } + + if (flags & IOMMUFD_ACCESS_RW_WRITE) { + if (copy_to_user(pages->uptr + start_byte, data, length)) + rc = -EFAULT; + } else { + if (copy_from_user(data, pages->uptr + start_byte, length)) + rc = -EFAULT; + } + + if (change_mm) { + kthread_unuse_mm(pages->source_mm); + mmput(pages->source_mm); + } + + return rc; +} + +static struct iopt_pages_access * +iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index, + unsigned long last) +{ + struct interval_tree_node *node; + + lockdep_assert_held(&pages->mutex); + + /* There can be overlapping ranges in this interval tree */ + for (node = interval_tree_iter_first(&pages->access_itree, index, last); + node; node = interval_tree_iter_next(node, index, last)) + if (node->start == index && node->last == last) + return container_of(node, struct iopt_pages_access, + node); + return NULL; +} + +/** + * iopt_area_add_access() - Record an in-knerel access for PFNs + * @area: The source of PFNs + * @start_index: First page index + * @last_index: Inclusive last page index + * @out_pages: Output list of struct page's representing the PFNs + * @flags: IOMMUFD_ACCESS_RW_* flags + * + * Record that an in-kernel access will be accessing the pages, ensure they are + * pinned, and return the PFNs as a simple list of 'struct page *'. + * + * This should be undone through a matching call to iopt_area_remove_access() + */ +int iopt_area_add_access(struct iopt_area *area, unsigned long start_index, + unsigned long last_index, struct page **out_pages, + unsigned int flags) +{ + struct iopt_pages *pages = area->pages; + struct iopt_pages_access *access; + int rc; + + if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable) + return -EPERM; + + mutex_lock(&pages->mutex); + access = iopt_pages_get_exact_access(pages, start_index, last_index); + if (access) { + area->num_accesses++; + access->users++; + iopt_pages_fill_from_xarray(pages, start_index, last_index, + out_pages); + mutex_unlock(&pages->mutex); + return 0; + } + + access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT); + if (!access) { + rc = -ENOMEM; + goto err_unlock; + } + + rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages); + if (rc) + goto err_free; + + access->node.start = start_index; + access->node.last = last_index; + access->users = 1; + area->num_accesses++; + interval_tree_insert(&access->node, &pages->access_itree); + mutex_unlock(&pages->mutex); + return 0; + +err_free: + kfree(access); +err_unlock: + mutex_unlock(&pages->mutex); + return rc; +} + +/** + * iopt_area_remove_access() - Release an in-kernel access for PFNs + * @area: The source of PFNs + * @start_index: First page index + * @last_index: Inclusive last page index + * + * Undo iopt_area_add_access() and unpin the pages if necessary. The caller + * must stop using the PFNs before calling this. + */ +void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index, + unsigned long last_index) +{ + struct iopt_pages *pages = area->pages; + struct iopt_pages_access *access; + + mutex_lock(&pages->mutex); + access = iopt_pages_get_exact_access(pages, start_index, last_index); + if (WARN_ON(!access)) + goto out_unlock; + + WARN_ON(area->num_accesses == 0 || access->users == 0); + area->num_accesses--; + access->users--; + if (access->users) + goto out_unlock; + + interval_tree_remove(&access->node, &pages->access_itree); + iopt_pages_unfill_xarray(pages, start_index, last_index); + kfree(access); +out_unlock: + mutex_unlock(&pages->mutex); +} diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c new file mode 100644 index 0000000000000000000000000000000000000000..cfb5fe9a5e0ee8664518158a7176c909ee8a9c06 --- /dev/null +++ b/drivers/iommu/iommufd/selftest.c @@ -0,0 +1,853 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. + * + * Kernel side components to support tools/testing/selftests/iommu + */ +#include +#include +#include +#include +#include +#include +#include + +#include "io_pagetable.h" +#include "iommufd_private.h" +#include "iommufd_test.h" + +static DECLARE_FAULT_ATTR(fail_iommufd); +static struct dentry *dbgfs_root; + +size_t iommufd_test_memory_limit = 65536; + +enum { + MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2, + + /* + * Like a real page table alignment requires the low bits of the address + * to be zero. xarray also requires the high bit to be zero, so we store + * the pfns shifted. The upper bits are used for metadata. + */ + MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE, + + _MOCK_PFN_START = MOCK_PFN_MASK + 1, + MOCK_PFN_START_IOVA = _MOCK_PFN_START, + MOCK_PFN_LAST_IOVA = _MOCK_PFN_START, +}; + +/* + * Syzkaller has trouble randomizing the correct iova to use since it is linked + * to the map ioctl's output, and it has no ide about that. So, simplify things. + * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset + * value. This has a much smaller randomization space and syzkaller can hit it. + */ +static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt, + u64 *iova) +{ + struct syz_layout { + __u32 nth_area; + __u32 offset; + }; + struct syz_layout *syz = (void *)iova; + unsigned int nth = syz->nth_area; + struct iopt_area *area; + + down_read(&iopt->iova_rwsem); + for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area; + area = iopt_area_iter_next(area, 0, ULONG_MAX)) { + if (nth == 0) { + up_read(&iopt->iova_rwsem); + return iopt_area_iova(area) + syz->offset; + } + nth--; + } + up_read(&iopt->iova_rwsem); + + return 0; +} + +void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, u64 *iova, u32 *flags) +{ + struct iommufd_ioas *ioas; + + if (!(*flags & MOCK_FLAGS_ACCESS_SYZ)) + return; + *flags &= ~(u32)MOCK_FLAGS_ACCESS_SYZ; + + ioas = iommufd_get_ioas(ucmd, ioas_id); + if (IS_ERR(ioas)) + return; + *iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova); + iommufd_put_object(&ioas->obj); +} + +struct mock_iommu_domain { + struct iommu_domain domain; + struct xarray pfns; +}; + +enum selftest_obj_type { + TYPE_IDEV, +}; + +struct selftest_obj { + struct iommufd_object obj; + enum selftest_obj_type type; + + union { + struct { + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ctx *ictx; + struct device mock_dev; + } idev; + }; +}; + +static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type) +{ + struct mock_iommu_domain *mock; + + if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED)) + return NULL; + + mock = kzalloc(sizeof(*mock), GFP_KERNEL); + if (!mock) + return NULL; + mock->domain.geometry.aperture_start = MOCK_APERTURE_START; + mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST; + mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE; + xa_init(&mock->pfns); + return &mock->domain; +} + +static void mock_domain_free(struct iommu_domain *domain) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + + WARN_ON(!xa_empty(&mock->pfns)); + kfree(mock); +} + +static int mock_domain_map_pages(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t pgsize, size_t pgcount, int prot, + gfp_t gfp, size_t *mapped) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + unsigned long flags = MOCK_PFN_START_IOVA; + unsigned long start_iova = iova; + + /* + * xarray does not reliably work with fault injection because it does a + * retry allocation, so put our own failure point. + */ + if (iommufd_should_fail()) + return -ENOENT; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); + for (; pgcount; pgcount--) { + size_t cur; + + for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { + void *old; + + if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) + flags = MOCK_PFN_LAST_IOVA; + old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE, + xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) | + flags), + gfp); + if (xa_is_err(old)) { + for (; start_iova != iova; + start_iova += MOCK_IO_PAGE_SIZE) + xa_erase(&mock->pfns, + start_iova / + MOCK_IO_PAGE_SIZE); + return xa_err(old); + } + WARN_ON(old); + iova += MOCK_IO_PAGE_SIZE; + paddr += MOCK_IO_PAGE_SIZE; + *mapped += MOCK_IO_PAGE_SIZE; + flags = 0; + } + } + return 0; +} + +static size_t mock_domain_unmap_pages(struct iommu_domain *domain, + unsigned long iova, size_t pgsize, + size_t pgcount, + struct iommu_iotlb_gather *iotlb_gather) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + bool first = true; + size_t ret = 0; + void *ent; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + WARN_ON(pgsize % MOCK_IO_PAGE_SIZE); + + for (; pgcount; pgcount--) { + size_t cur; + + for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) { + ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + WARN_ON(!ent); + /* + * iommufd generates unmaps that must be a strict + * superset of the map's performend So every starting + * IOVA should have been an iova passed to map, and the + * + * First IOVA must be present and have been a first IOVA + * passed to map_pages + */ + if (first) { + WARN_ON(!(xa_to_value(ent) & + MOCK_PFN_START_IOVA)); + first = false; + } + if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize) + WARN_ON(!(xa_to_value(ent) & + MOCK_PFN_LAST_IOVA)); + + iova += MOCK_IO_PAGE_SIZE; + ret += MOCK_IO_PAGE_SIZE; + } + } + return ret; +} + +static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain, + dma_addr_t iova) +{ + struct mock_iommu_domain *mock = + container_of(domain, struct mock_iommu_domain, domain); + void *ent; + + WARN_ON(iova % MOCK_IO_PAGE_SIZE); + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + WARN_ON(!ent); + return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE; +} + +static const struct iommu_ops mock_ops = { + .owner = THIS_MODULE, + .pgsize_bitmap = MOCK_IO_PAGE_SIZE, + .domain_alloc = mock_domain_alloc, + .default_domain_ops = + &(struct iommu_domain_ops){ + .free = mock_domain_free, + .map_pages = mock_domain_map_pages, + .unmap_pages = mock_domain_unmap_pages, + .iova_to_phys = mock_domain_iova_to_phys, + }, +}; + +static inline struct iommufd_hw_pagetable * +get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id, + struct mock_iommu_domain **mock) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_object *obj; + + obj = iommufd_get_object(ucmd->ictx, mockpt_id, + IOMMUFD_OBJ_HW_PAGETABLE); + if (IS_ERR(obj)) + return ERR_CAST(obj); + hwpt = container_of(obj, struct iommufd_hw_pagetable, obj); + if (hwpt->domain->ops != mock_ops.default_domain_ops) { + iommufd_put_object(&hwpt->obj); + return ERR_PTR(-EINVAL); + } + *mock = container_of(hwpt->domain, struct mock_iommu_domain, domain); + return hwpt; +} + +/* Create an hw_pagetable with the mock domain so we can test the domain ops */ +static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd, + struct iommu_test_cmd *cmd) +{ + static struct bus_type mock_bus = { .iommu_ops = &mock_ops }; + struct iommufd_hw_pagetable *hwpt; + struct selftest_obj *sobj; + struct iommufd_ioas *ioas; + int rc; + + ioas = iommufd_get_ioas(ucmd, cmd->id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + sobj = iommufd_object_alloc(ucmd->ictx, sobj, IOMMUFD_OBJ_SELFTEST); + if (IS_ERR(sobj)) { + rc = PTR_ERR(sobj); + goto out_ioas; + } + sobj->idev.ictx = ucmd->ictx; + sobj->type = TYPE_IDEV; + sobj->idev.mock_dev.bus = &mock_bus; + + hwpt = iommufd_device_selftest_attach(ucmd->ictx, ioas, + &sobj->idev.mock_dev); + if (IS_ERR(hwpt)) { + rc = PTR_ERR(hwpt); + goto out_sobj; + } + sobj->idev.hwpt = hwpt; + + /* Userspace must destroy both of these IDs to destroy the object */ + cmd->mock_domain.out_hwpt_id = hwpt->obj.id; + cmd->mock_domain.out_device_id = sobj->obj.id; + iommufd_object_finalize(ucmd->ictx, &sobj->obj); + iommufd_put_object(&ioas->obj); + return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + +out_sobj: + iommufd_object_abort(ucmd->ictx, &sobj->obj); +out_ioas: + iommufd_put_object(&ioas->obj); + return rc; +} + +/* Add an additional reserved IOVA to the IOAS */ +static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd, + unsigned int mockpt_id, + unsigned long start, size_t length) +{ + struct iommufd_ioas *ioas; + int rc; + + ioas = iommufd_get_ioas(ucmd, mockpt_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + down_write(&ioas->iopt.iova_rwsem); + rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL); + up_write(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +/* Check that every pfn under each iova matches the pfn under a user VA */ +static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd, + unsigned int mockpt_id, unsigned long iova, + size_t length, void __user *uptr) +{ + struct iommufd_hw_pagetable *hwpt; + struct mock_iommu_domain *mock; + int rc; + + if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE || + (uintptr_t)uptr % MOCK_IO_PAGE_SIZE) + return -EINVAL; + + hwpt = get_md_pagetable(ucmd, mockpt_id, &mock); + if (IS_ERR(hwpt)) + return PTR_ERR(hwpt); + + for (; length; length -= MOCK_IO_PAGE_SIZE) { + struct page *pages[1]; + unsigned long pfn; + long npages; + void *ent; + + npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0, + pages); + if (npages < 0) { + rc = npages; + goto out_put; + } + if (WARN_ON(npages != 1)) { + rc = -EFAULT; + goto out_put; + } + pfn = page_to_pfn(pages[0]); + put_page(pages[0]); + + ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE); + if (!ent || + (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE != + pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) { + rc = -EINVAL; + goto out_put; + } + iova += MOCK_IO_PAGE_SIZE; + uptr += MOCK_IO_PAGE_SIZE; + } + rc = 0; + +out_put: + iommufd_put_object(&hwpt->obj); + return rc; +} + +/* Check that the page ref count matches, to look for missing pin/unpins */ +static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd, + void __user *uptr, size_t length, + unsigned int refs) +{ + if (length % PAGE_SIZE || (uintptr_t)uptr % PAGE_SIZE) + return -EINVAL; + + for (; length; length -= PAGE_SIZE) { + struct page *pages[1]; + long npages; + + npages = get_user_pages_fast((uintptr_t)uptr, 1, 0, pages); + if (npages < 0) + return npages; + if (WARN_ON(npages != 1)) + return -EFAULT; + if (!PageCompound(pages[0])) { + unsigned int count; + + count = page_ref_count(pages[0]); + if (count / GUP_PIN_COUNTING_BIAS != refs) { + put_page(pages[0]); + return -EIO; + } + } + put_page(pages[0]); + uptr += PAGE_SIZE; + } + return 0; +} + +struct selftest_access { + struct iommufd_access *access; + struct file *file; + struct mutex lock; + struct list_head items; + unsigned int next_id; + bool destroying; +}; + +struct selftest_access_item { + struct list_head items_elm; + unsigned long iova; + size_t length; + unsigned int id; +}; + +static const struct file_operations iommfd_test_staccess_fops; + +static struct selftest_access *iommufd_access_get(int fd) +{ + struct file *file; + + file = fget(fd); + if (!file) + return ERR_PTR(-EBADFD); + + if (file->f_op != &iommfd_test_staccess_fops) { + fput(file); + return ERR_PTR(-EBADFD); + } + return file->private_data; +} + +static void iommufd_test_access_unmap(void *data, unsigned long iova, + unsigned long length) +{ + unsigned long iova_last = iova + length - 1; + struct selftest_access *staccess = data; + struct selftest_access_item *item; + struct selftest_access_item *tmp; + + mutex_lock(&staccess->lock); + list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) { + if (iova > item->iova + item->length - 1 || + iova_last < item->iova) + continue; + list_del(&item->items_elm); + iommufd_access_unpin_pages(staccess->access, item->iova, + item->length); + kfree(item); + } + mutex_unlock(&staccess->lock); +} + +static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd, + unsigned int access_id, + unsigned int item_id) +{ + struct selftest_access_item *item; + struct selftest_access *staccess; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + mutex_lock(&staccess->lock); + list_for_each_entry(item, &staccess->items, items_elm) { + if (item->id == item_id) { + list_del(&item->items_elm); + iommufd_access_unpin_pages(staccess->access, item->iova, + item->length); + mutex_unlock(&staccess->lock); + kfree(item); + fput(staccess->file); + return 0; + } + } + mutex_unlock(&staccess->lock); + fput(staccess->file); + return -ENOENT; +} + +static int iommufd_test_staccess_release(struct inode *inode, + struct file *filep) +{ + struct selftest_access *staccess = filep->private_data; + + if (staccess->access) { + iommufd_test_access_unmap(staccess, 0, ULONG_MAX); + iommufd_access_destroy(staccess->access); + } + mutex_destroy(&staccess->lock); + kfree(staccess); + return 0; +} + +static const struct iommufd_access_ops selftest_access_ops_pin = { + .needs_pin_pages = 1, + .unmap = iommufd_test_access_unmap, +}; + +static const struct iommufd_access_ops selftest_access_ops = { + .unmap = iommufd_test_access_unmap, +}; + +static const struct file_operations iommfd_test_staccess_fops = { + .release = iommufd_test_staccess_release, +}; + +static struct selftest_access *iommufd_test_alloc_access(void) +{ + struct selftest_access *staccess; + struct file *filep; + + staccess = kzalloc(sizeof(*staccess), GFP_KERNEL_ACCOUNT); + if (!staccess) + return ERR_PTR(-ENOMEM); + INIT_LIST_HEAD(&staccess->items); + mutex_init(&staccess->lock); + + filep = anon_inode_getfile("[iommufd_test_staccess]", + &iommfd_test_staccess_fops, staccess, + O_RDWR); + if (IS_ERR(filep)) { + kfree(staccess); + return ERR_CAST(filep); + } + staccess->file = filep; + return staccess; +} + +static int iommufd_test_create_access(struct iommufd_ucmd *ucmd, + unsigned int ioas_id, unsigned int flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access *staccess; + struct iommufd_access *access; + int fdno; + int rc; + + if (flags & ~MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) + return -EOPNOTSUPP; + + staccess = iommufd_test_alloc_access(); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + rc = -ENOMEM; + goto out_free_staccess; + } + + access = iommufd_access_create( + ucmd->ictx, ioas_id, + (flags & MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) ? + &selftest_access_ops_pin : + &selftest_access_ops, + staccess); + if (IS_ERR(access)) { + rc = PTR_ERR(access); + goto out_put_fdno; + } + cmd->create_access.out_access_fd = fdno; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_destroy; + + staccess->access = access; + fd_install(fdno, staccess->file); + return 0; + +out_destroy: + iommufd_access_destroy(access); +out_put_fdno: + put_unused_fd(fdno); +out_free_staccess: + fput(staccess->file); + return rc; +} + +/* Check that the pages in a page array match the pages in the user VA */ +static int iommufd_test_check_pages(void __user *uptr, struct page **pages, + size_t npages) +{ + for (; npages; npages--) { + struct page *tmp_pages[1]; + long rc; + + rc = get_user_pages_fast((uintptr_t)uptr, 1, 0, tmp_pages); + if (rc < 0) + return rc; + if (WARN_ON(rc != 1)) + return -EFAULT; + put_page(tmp_pages[0]); + if (tmp_pages[0] != *pages) + return -EBADE; + pages++; + uptr += PAGE_SIZE; + } + return 0; +} + +static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd, + unsigned int access_id, unsigned long iova, + size_t length, void __user *uptr, + u32 flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access_item *item; + struct selftest_access *staccess; + struct page **pages; + size_t npages; + int rc; + + /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ + if (length > 16*1024*1024) + return -ENOMEM; + + if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ)) + return -EOPNOTSUPP; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + if (staccess->access->ops != &selftest_access_ops_pin) { + rc = -EOPNOTSUPP; + goto out_put; + } + + if (flags & MOCK_FLAGS_ACCESS_SYZ) + iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + &cmd->access_pages.iova); + + npages = (ALIGN(iova + length, PAGE_SIZE) - + ALIGN_DOWN(iova, PAGE_SIZE)) / + PAGE_SIZE; + pages = kvcalloc(npages, sizeof(*pages), GFP_KERNEL_ACCOUNT); + if (!pages) { + rc = -ENOMEM; + goto out_put; + } + + /* + * Drivers will need to think very carefully about this locking. The + * core code can do multiple unmaps instantaneously after + * iommufd_access_pin_pages() and *all* the unmaps must not return until + * the range is unpinned. This simple implementation puts a global lock + * around the pin, which may not suit drivers that want this to be a + * performance path. drivers that get this wrong will trigger WARN_ON + * races and cause EDEADLOCK failures to userspace. + */ + mutex_lock(&staccess->lock); + rc = iommufd_access_pin_pages(staccess->access, iova, length, pages, + flags & MOCK_FLAGS_ACCESS_WRITE); + if (rc) + goto out_unlock; + + /* For syzkaller allow uptr to be NULL to skip this check */ + if (uptr) { + rc = iommufd_test_check_pages( + uptr - (iova - ALIGN_DOWN(iova, PAGE_SIZE)), pages, + npages); + if (rc) + goto out_unaccess; + } + + item = kzalloc(sizeof(*item), GFP_KERNEL_ACCOUNT); + if (!item) { + rc = -ENOMEM; + goto out_unaccess; + } + + item->iova = iova; + item->length = length; + item->id = staccess->next_id++; + list_add_tail(&item->items_elm, &staccess->items); + + cmd->access_pages.out_access_pages_id = item->id; + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + if (rc) + goto out_free_item; + goto out_unlock; + +out_free_item: + list_del(&item->items_elm); + kfree(item); +out_unaccess: + iommufd_access_unpin_pages(staccess->access, iova, length); +out_unlock: + mutex_unlock(&staccess->lock); + kvfree(pages); +out_put: + fput(staccess->file); + return rc; +} + +static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd, + unsigned int access_id, unsigned long iova, + size_t length, void __user *ubuf, + unsigned int flags) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + struct selftest_access *staccess; + void *tmp; + int rc; + + /* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */ + if (length > 16*1024*1024) + return -ENOMEM; + + if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH | + MOCK_FLAGS_ACCESS_SYZ)) + return -EOPNOTSUPP; + + staccess = iommufd_access_get(access_id); + if (IS_ERR(staccess)) + return PTR_ERR(staccess); + + tmp = kvzalloc(length, GFP_KERNEL_ACCOUNT); + if (!tmp) { + rc = -ENOMEM; + goto out_put; + } + + if (flags & MOCK_ACCESS_RW_WRITE) { + if (copy_from_user(tmp, ubuf, length)) { + rc = -EFAULT; + goto out_free; + } + } + + if (flags & MOCK_FLAGS_ACCESS_SYZ) + iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt, + &cmd->access_rw.iova); + + rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags); + if (rc) + goto out_free; + if (!(flags & MOCK_ACCESS_RW_WRITE)) { + if (copy_to_user(ubuf, tmp, length)) { + rc = -EFAULT; + goto out_free; + } + } + +out_free: + kvfree(tmp); +out_put: + fput(staccess->file); + return rc; +} +static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE); +static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH == + __IOMMUFD_ACCESS_RW_SLOW_PATH); + +void iommufd_selftest_destroy(struct iommufd_object *obj) +{ + struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj); + + switch (sobj->type) { + case TYPE_IDEV: + iommufd_device_selftest_detach(sobj->idev.ictx, + sobj->idev.hwpt); + break; + } +} + +int iommufd_test(struct iommufd_ucmd *ucmd) +{ + struct iommu_test_cmd *cmd = ucmd->cmd; + + switch (cmd->op) { + case IOMMU_TEST_OP_ADD_RESERVED: + return iommufd_test_add_reserved(ucmd, cmd->id, + cmd->add_reserved.start, + cmd->add_reserved.length); + case IOMMU_TEST_OP_MOCK_DOMAIN: + return iommufd_test_mock_domain(ucmd, cmd); + case IOMMU_TEST_OP_MD_CHECK_MAP: + return iommufd_test_md_check_pa( + ucmd, cmd->id, cmd->check_map.iova, + cmd->check_map.length, + u64_to_user_ptr(cmd->check_map.uptr)); + case IOMMU_TEST_OP_MD_CHECK_REFS: + return iommufd_test_md_check_refs( + ucmd, u64_to_user_ptr(cmd->check_refs.uptr), + cmd->check_refs.length, cmd->check_refs.refs); + case IOMMU_TEST_OP_CREATE_ACCESS: + return iommufd_test_create_access(ucmd, cmd->id, + cmd->create_access.flags); + case IOMMU_TEST_OP_ACCESS_PAGES: + return iommufd_test_access_pages( + ucmd, cmd->id, cmd->access_pages.iova, + cmd->access_pages.length, + u64_to_user_ptr(cmd->access_pages.uptr), + cmd->access_pages.flags); + case IOMMU_TEST_OP_ACCESS_RW: + return iommufd_test_access_rw( + ucmd, cmd->id, cmd->access_rw.iova, + cmd->access_rw.length, + u64_to_user_ptr(cmd->access_rw.uptr), + cmd->access_rw.flags); + case IOMMU_TEST_OP_DESTROY_ACCESS_PAGES: + return iommufd_test_access_item_destroy( + ucmd, cmd->id, cmd->destroy_access_pages.access_pages_id); + case IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT: + /* Protect _batch_init(), can not be less than elmsz */ + if (cmd->memory_limit.limit < + sizeof(unsigned long) + sizeof(u32)) + return -EINVAL; + iommufd_test_memory_limit = cmd->memory_limit.limit; + return 0; + default: + return -EOPNOTSUPP; + } +} + +bool iommufd_should_fail(void) +{ + return should_fail(&fail_iommufd, 1); +} + +void __init iommufd_test_init(void) +{ + dbgfs_root = + fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd); +} + +void iommufd_test_exit(void) +{ + debugfs_remove_recursive(dbgfs_root); +} diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c new file mode 100644 index 0000000000000000000000000000000000000000..3ceca0e8311c39fab2a4a1f06664d7dc4742cc81 --- /dev/null +++ b/drivers/iommu/iommufd/vfio_compat.c @@ -0,0 +1,472 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "iommufd_private.h" + +static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx) +{ + struct iommufd_ioas *ioas = ERR_PTR(-ENODEV); + + xa_lock(&ictx->objects); + if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj)) + goto out_unlock; + ioas = ictx->vfio_ioas; +out_unlock: + xa_unlock(&ictx->objects); + return ioas; +} + +/** + * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use + * @ictx: Context to operate on + * @out_ioas_id: The ioas_id the caller should use + * + * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate + * on since they do not have an IOAS ID input in their ABI. Only attaching a + * group should cause a default creation of the internal ioas, this returns the + * existing ioas if it has already been assigned somehow. + */ +int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id) +{ + struct iommufd_ioas *ioas = NULL; + struct iommufd_ioas *out_ioas; + + ioas = iommufd_ioas_alloc(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + xa_lock(&ictx->objects); + if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj)) + out_ioas = ictx->vfio_ioas; + else { + out_ioas = ioas; + ictx->vfio_ioas = ioas; + } + xa_unlock(&ictx->objects); + + *out_ioas_id = out_ioas->obj.id; + if (out_ioas != ioas) { + iommufd_put_object(&out_ioas->obj); + iommufd_object_abort(ictx, &ioas->obj); + return 0; + } + /* + * An automatically created compat IOAS is treated as a userspace + * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET, + * and if not manually destroyed it will be destroyed automatically + * at iommufd release. + */ + iommufd_object_finalize(ictx, &ioas->obj); + return 0; +} +EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO); + +int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd) +{ + struct iommu_vfio_ioas *cmd = ucmd->cmd; + struct iommufd_ioas *ioas; + + if (cmd->__reserved) + return -EOPNOTSUPP; + switch (cmd->op) { + case IOMMU_VFIO_IOAS_GET: + ioas = get_compat_ioas(ucmd->ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + cmd->ioas_id = ioas->obj.id; + iommufd_put_object(&ioas->obj); + return iommufd_ucmd_respond(ucmd, sizeof(*cmd)); + + case IOMMU_VFIO_IOAS_SET: + ioas = iommufd_get_ioas(ucmd, cmd->ioas_id); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + xa_lock(&ucmd->ictx->objects); + ucmd->ictx->vfio_ioas = ioas; + xa_unlock(&ucmd->ictx->objects); + iommufd_put_object(&ioas->obj); + return 0; + + case IOMMU_VFIO_IOAS_CLEAR: + xa_lock(&ucmd->ictx->objects); + ucmd->ictx->vfio_ioas = NULL; + xa_unlock(&ucmd->ictx->objects); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd, + void __user *arg) +{ + u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); + struct vfio_iommu_type1_dma_map map; + int iommu_prot = IOMMU_CACHE; + struct iommufd_ioas *ioas; + unsigned long iova; + int rc; + + if (copy_from_user(&map, arg, minsz)) + return -EFAULT; + + if (map.argsz < minsz || map.flags & ~supported_flags) + return -EINVAL; + + if (map.flags & VFIO_DMA_MAP_FLAG_READ) + iommu_prot |= IOMMU_READ; + if (map.flags & VFIO_DMA_MAP_FLAG_WRITE) + iommu_prot |= IOMMU_WRITE; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + /* + * Maps created through the legacy interface always use VFIO compatible + * rlimit accounting. If the user wishes to use the faster user based + * rlimit accounting then they must use the new interface. + */ + iova = map.iova; + rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr), + map.size, iommu_prot, 0); + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd, + void __user *arg) +{ + size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size); + /* + * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new + * dirty tracking direction: + * https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/ + * https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/ + */ + u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL; + struct vfio_iommu_type1_dma_unmap unmap; + unsigned long unmapped = 0; + struct iommufd_ioas *ioas; + int rc; + + if (copy_from_user(&unmap, arg, minsz)) + return -EFAULT; + + if (unmap.argsz < minsz || unmap.flags & ~supported_flags) + return -EINVAL; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) { + if (unmap.iova != 0 || unmap.size != 0) { + rc = -EINVAL; + goto err_put; + } + rc = iopt_unmap_all(&ioas->iopt, &unmapped); + } else { + if (READ_ONCE(ioas->iopt.disable_large_pages)) { + /* + * Create cuts at the start and last of the requested + * range. If the start IOVA is 0 then it doesn't need to + * be cut. + */ + unsigned long iovas[] = { unmap.iova + unmap.size - 1, + unmap.iova - 1 }; + + rc = iopt_cut_iova(&ioas->iopt, iovas, + unmap.iova ? 2 : 1); + if (rc) + goto err_put; + } + rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size, + &unmapped); + } + unmap.size = unmapped; + if (copy_to_user(arg, &unmap, minsz)) + rc = -EFAULT; + +err_put: + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx) +{ + struct iommufd_hw_pagetable *hwpt; + struct iommufd_ioas *ioas; + int rc = 1; + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + mutex_lock(&ioas->mutex); + list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) { + if (!hwpt->enforce_cache_coherency) { + rc = 0; + break; + } + } + mutex_unlock(&ioas->mutex); + + iommufd_put_object(&ioas->obj); + return rc; +} + +static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx, + unsigned long type) +{ + switch (type) { + case VFIO_TYPE1_IOMMU: + case VFIO_TYPE1v2_IOMMU: + case VFIO_UNMAP_ALL: + return 1; + + case VFIO_DMA_CC_IOMMU: + return iommufd_vfio_cc_iommu(ictx); + + /* + * This is obsolete, and to be removed from VFIO. It was an incomplete + * idea that got merged. + * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/ + */ + case VFIO_TYPE1_NESTING_IOMMU: + return 0; + + /* + * VFIO_DMA_MAP_FLAG_VADDR + * https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/ + * https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/ + * + * It is hard to see how this could be implemented safely. + */ + case VFIO_UPDATE_VADDR: + default: + return 0; + } +} + +static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type) +{ + struct iommufd_ioas *ioas = NULL; + int rc = 0; + + if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU) + return -EINVAL; + + /* VFIO fails the set_iommu if there is no group */ + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + /* + * The difference between TYPE1 and TYPE1v2 is the ability to unmap in + * the middle of mapped ranges. This is complicated by huge page support + * which creates single large IOPTEs that cannot be split by the iommu + * driver. TYPE1 is very old at this point and likely nothing uses it, + * however it is simple enough to emulate by simply disabling the + * problematic large IOPTEs. Then we can safely unmap within any range. + */ + if (type == VFIO_TYPE1_IOMMU) + rc = iopt_disable_large_pages(&ioas->iopt); + iommufd_put_object(&ioas->obj); + return rc; +} + +static unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas) +{ + struct io_pagetable *iopt = &ioas->iopt; + unsigned long pgsize_bitmap = ULONG_MAX; + struct iommu_domain *domain; + unsigned long index; + + down_read(&iopt->domains_rwsem); + xa_for_each(&iopt->domains, index, domain) + pgsize_bitmap &= domain->pgsize_bitmap; + + /* See vfio_update_pgsize_bitmap() */ + if (pgsize_bitmap & ~PAGE_MASK) { + pgsize_bitmap &= PAGE_MASK; + pgsize_bitmap |= PAGE_SIZE; + } + pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment); + up_read(&iopt->domains_rwsem); + return pgsize_bitmap; +} + +static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail) +{ + struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas = + container_of(cur, + struct vfio_iommu_type1_info_cap_iova_range __user, + header); + struct vfio_iommu_type1_info_cap_iova_range cap_iovas = { + .header = { + .id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE, + .version = 1, + }, + }; + struct interval_tree_span_iter span; + + interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0, + ULONG_MAX) { + struct vfio_iova_range range; + + if (!span.is_hole) + continue; + range.start = span.start_hole; + range.end = span.last_hole; + if (avail >= struct_size(&cap_iovas, iova_ranges, + cap_iovas.nr_iovas + 1) && + copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas], + &range, sizeof(range))) + return -EFAULT; + cap_iovas.nr_iovas++; + } + if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) && + copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas))) + return -EFAULT; + return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas); +} + +static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail) +{ + struct vfio_iommu_type1_info_dma_avail cap_dma = { + .header = { + .id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL, + .version = 1, + }, + /* + * iommufd's limit is based on the cgroup's memory limit. + * Normally vfio would return U16_MAX here, and provide a module + * parameter to adjust it. Since S390 qemu userspace actually + * pays attention and needs a value bigger than U16_MAX return + * U32_MAX. + */ + .avail = U32_MAX, + }; + + if (avail >= sizeof(cap_dma) && + copy_to_user(cur, &cap_dma, sizeof(cap_dma))) + return -EFAULT; + return sizeof(cap_dma); +} + +static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx, + void __user *arg) +{ + typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas, + struct vfio_info_cap_header __user *cur, + size_t avail); + static const fill_cap_fn fill_fns[] = { + iommufd_fill_cap_dma_avail, + iommufd_fill_cap_iova, + }; + size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes); + struct vfio_info_cap_header __user *last_cap = NULL; + struct vfio_iommu_type1_info info; + struct iommufd_ioas *ioas; + size_t total_cap_size; + int rc; + int i; + + if (copy_from_user(&info, arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + minsz = min_t(size_t, info.argsz, sizeof(info)); + + ioas = get_compat_ioas(ictx); + if (IS_ERR(ioas)) + return PTR_ERR(ioas); + + info.flags = VFIO_IOMMU_INFO_PGSIZES; + info.iova_pgsizes = iommufd_get_pagesizes(ioas); + info.cap_offset = 0; + + down_read(&ioas->iopt.iova_rwsem); + total_cap_size = sizeof(info); + for (i = 0; i != ARRAY_SIZE(fill_fns); i++) { + int cap_size; + + if (info.argsz > total_cap_size) + cap_size = fill_fns[i](ioas, arg + total_cap_size, + info.argsz - total_cap_size); + else + cap_size = fill_fns[i](ioas, NULL, 0); + if (cap_size < 0) { + rc = cap_size; + goto out_put; + } + if (last_cap && info.argsz >= total_cap_size && + put_user(total_cap_size, &last_cap->next)) { + rc = -EFAULT; + goto out_put; + } + last_cap = arg + total_cap_size; + total_cap_size += cap_size; + } + + /* + * If the user did not provide enough space then only some caps are + * returned and the argsz will be updated to the correct amount to get + * all caps. + */ + if (info.argsz >= total_cap_size) + info.cap_offset = sizeof(info); + info.argsz = total_cap_size; + info.flags |= VFIO_IOMMU_INFO_CAPS; + if (copy_to_user(arg, &info, minsz)) { + rc = -EFAULT; + goto out_put; + } + rc = 0; + +out_put: + up_read(&ioas->iopt.iova_rwsem); + iommufd_put_object(&ioas->obj); + return rc; +} + +int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd, + unsigned long arg) +{ + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_GET_API_VERSION: + return VFIO_API_VERSION; + case VFIO_SET_IOMMU: + return iommufd_vfio_set_iommu(ictx, arg); + case VFIO_CHECK_EXTENSION: + return iommufd_vfio_check_extension(ictx, arg); + case VFIO_IOMMU_GET_INFO: + return iommufd_vfio_iommu_get_info(ictx, uarg); + case VFIO_IOMMU_MAP_DMA: + return iommufd_vfio_map_dma(ictx, cmd, uarg); + case VFIO_IOMMU_UNMAP_DMA: + return iommufd_vfio_unmap_dma(ictx, cmd, uarg); + case VFIO_IOMMU_DIRTY_PAGES: + default: + return -ENOIOCTLCMD; + } + return -ENOIOCTLCMD; +} diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 3b30c0752274fd43e13ac19908499a6fa1a65e0f..22230cc15dcd1fd17568faaeb6a2cdf3a01346d2 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -628,8 +628,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain, * Something is wrong, we can't attach two devices using * different IOMMUs to the same domain. */ - dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n", - dev_name(mmu->dev), dev_name(domain->mmu->dev)); ret = -EINVAL; } else dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id); diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 2ab2ecfe01f80260ef6c835127ee385249926df3..b383c8327f9cbafde87cf677e7782759f3c86721 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -609,7 +609,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom, dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data); if (!dom->iop) { dev_err(data->dev, "Failed to alloc io pgtable\n"); - return -EINVAL; + return -ENOMEM; } /* Update our support page sizes bitmap */ @@ -668,7 +668,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain, ret = mtk_iommu_domain_finalise(dom, frstdata, region_id); if (ret) { mutex_unlock(&dom->mutex); - return -ENODEV; + return ret; } dom->bank = &data->bank[bankid]; } diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 07ee2600113c20969b8f5b24ae4dfc7fc81be3a3..2fd7702c6709690a770325ac52fa890ecae7410d 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1414,7 +1414,7 @@ static int omap_iommu_attach_init(struct device *dev, odomain->num_iommus = omap_iommu_count(dev); if (!odomain->num_iommus) - return -EINVAL; + return -ENODEV; odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu), GFP_ATOMIC); @@ -1464,7 +1464,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); - return -EINVAL; + return -ENODEV; } spin_lock(&omap_domain->lock); @@ -1472,7 +1472,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) /* only a single client device can be attached to a domain */ if (omap_domain->dev) { dev_err(dev, "iommu domain is already attached\n"); - ret = -EBUSY; + ret = -EINVAL; goto out; } diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index fadd2c907222b9230b49b2ec54f6dbcfdf5914e5..e027933755989d16cc1c1ba1ad611821292b7ed2 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -237,10 +237,8 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain, struct sprd_iommu_domain *dom = to_sprd_domain(domain); size_t pgt_size = sprd_iommu_pgt_size(domain); - if (dom->sdev) { - pr_err("There's already a device attached to this domain.\n"); + if (dom->sdev) return -EINVAL; - } dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL); if (!dom->pgt_va) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index e5ca3cf1a94964e1312b04703c73520a5d0b732d..ed53279d1106c437a3eb0dde68ff9beef7e3f3fd 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -112,7 +112,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, spin_lock(&gart->dom_lock); if (gart->active_domain && gart->active_domain != domain) { - ret = -EBUSY; + ret = -EINVAL; } else if (dev_iommu_priv_get(dev) != domain) { dev_iommu_priv_set(dev, domain); gart->active_domain = domain; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 8b1b5c270e502c5c15b48e88ffc6fb346f0818e4..5b8fe9bfa9a5b9ce2a78d5f0bfa4bd511b27d07f 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -670,7 +670,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, dev_err(vdev->dev, "granule 0x%lx larger than system page size 0x%lx\n", viommu_page_size, PAGE_SIZE); - return -EINVAL; + return -ENODEV; } ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain, @@ -697,7 +697,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev, if (ret) { ida_free(&viommu->domain_ids, vdomain->id); vdomain->viommu = NULL; - return -EOPNOTSUPP; + return ret; } } @@ -734,8 +734,7 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev) */ ret = viommu_domain_finalise(vdev, domain); } else if (vdomain->viommu != vdev->viommu) { - dev_err(dev, "cannot attach to foreign vIOMMU\n"); - ret = -EXDEV; + ret = -EINVAL; } mutex_unlock(&vdomain->mutex); diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index d07568a2c539fb9410336a7d2e606831d6becbff..caa952c40ff9284ab29b41c8662a4d3c88b17f38 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -538,31 +538,14 @@ config TI_PRUSS_INTC different processors within the SoC. config RISCV_INTC - bool "RISC-V Local Interrupt Controller" + bool depends on RISCV - default y - help - This enables support for the per-HART local interrupt controller - found in standard RISC-V systems. The per-HART local interrupt - controller handles timer interrupts, software interrupts, and - hardware interrupts. Without a per-HART local interrupt controller, - a RISC-V system will be unable to handle any interrupts. - - If you don't know what to do here, say Y. config SIFIVE_PLIC - bool "SiFive Platform-Level Interrupt Controller" + bool depends on RISCV select IRQ_DOMAIN_HIERARCHY select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP - help - This enables support for the PLIC chip found in SiFive (and - potentially other) RISC-V systems. The PLIC controls devices - interrupts and connects them to each core's local interrupt - controller. Aside from timer and software interrupts, all other - interrupt sources are subordinate to the PLIC. - - If you don't know what to do here, say Y. config EXYNOS_IRQ_COMBINER bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c index 6f270c0272fb1b49a0705404dc795a97549dfda5..35c61311e7fd8b65122c993e393c44f133757a12 100644 --- a/drivers/leds/blink/leds-lgm-sso.c +++ b/drivers/leds/blink/leds-lgm-sso.c @@ -635,9 +635,8 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled) led->priv = priv; desc = &led->desc; - led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL, - fwnode_child, - GPIOD_ASIS, NULL); + led->gpiod = devm_fwnode_gpiod_get(dev, fwnode_child, NULL, + GPIOD_ASIS, NULL); if (IS_ERR(led->gpiod)) { ret = dev_err_probe(dev, PTR_ERR(led->gpiod), "led: get gpio fail!\n"); goto __dt_err; diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c index 092eb59a7d325d29a1ed36b80002a1485d2710f7..ce4e79939731d89d69effdb610ac7cbdfd1d28b0 100644 --- a/drivers/leds/leds-gpio.c +++ b/drivers/leds/leds-gpio.c @@ -151,9 +151,8 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev) * will be updated after LED class device is registered, * Only then the final LED name is known. */ - led.gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL, child, - GPIOD_ASIS, - NULL); + led.gpiod = devm_fwnode_gpiod_get(dev, child, NULL, GPIOD_ASIS, + NULL); if (IS_ERR(led.gpiod)) { fwnode_handle_put(child); return ERR_CAST(led.gpiod); diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c index b70a013139c74becfc22e01daf7b5a8c75fc4a44..905eff1f840ed296c70bf767de330b0dac349b13 100644 --- a/drivers/misc/uacce/uacce.c +++ b/drivers/misc/uacce/uacce.c @@ -108,7 +108,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q) if (!(uacce->flags & UACCE_DEV_SVA)) return 0; - handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL); + handle = iommu_sva_bind_device(uacce->parent, current->mm); if (IS_ERR(handle)) return PTR_ERR(handle); diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 690b69cae4e3a8a25d16f39487bb0dcedac2beab..e708c2d049839f27b578dab501dd47c5b9061bc4 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -671,8 +671,7 @@ free_q: return err; } -int mana_gd_destroy_dma_region(struct gdma_context *gc, - gdma_obj_handle_t dma_region_handle) +int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle) { struct gdma_destroy_dma_region_req req = {}; struct gdma_general_resp resp = {}; diff --git a/drivers/pci/access.c b/drivers/pci/access.c index 708c7529647fd12dcd923063a32a8f03417a4c88..3c230ca3de5844f9e768178617506c3e9442d3c5 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -350,6 +350,11 @@ bool pcie_cap_has_lnkctl(const struct pci_dev *dev) type == PCI_EXP_TYPE_PCIE_BRIDGE; } +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev) +{ + return pcie_cap_has_lnkctl(dev) && pcie_cap_version(dev) > 1; +} + static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev) { return pcie_downstream_port(dev) && @@ -390,10 +395,11 @@ static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos) return pcie_cap_has_rtctl(dev); case PCI_EXP_DEVCAP2: case PCI_EXP_DEVCTL2: + return pcie_cap_version(dev) > 1; case PCI_EXP_LNKCAP2: case PCI_EXP_LNKCTL2: case PCI_EXP_LNKSTA2: - return pcie_cap_version(dev) > 1; + return pcie_cap_has_lnkctl2(dev); default: return false; } diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index c967ad6e262678790583bb3137ff273ec31a16c3..f9cc2e10b676a96a0a61701751fc6b12791405fc 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -382,6 +382,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features) if (!pasid) return -EINVAL; + if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF)) + return -EINVAL; + pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported); supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV; diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 3cef835b375fd6a6adaacd1384987a5f905e9c46..83ae838ceb5f0a3ea93750ea51568eb83bd91764 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -197,6 +197,10 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res, max = avail.end; + /* Don't bother if available space isn't large enough */ + if (size > max - min_used + 1) + continue; + /* Ok, try it out.. */ ret = allocate_resource(r, res, size, min_used, max, align, alignf, alignf_data); diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c index a82f845cc4b521c9b5410ab226d51a585ab1a261..cc83a8925ce03ba3bfee3a9937a72d7961a3f2a9 100644 --- a/drivers/pci/controller/cadence/pci-j721e.c +++ b/drivers/pci/controller/cadence/pci-j721e.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig index f3c462130627640b3a1fa444964d2d95331dd2b5..a0d2713f0e8896ef3936b0774707604ab1e1203b 100644 --- a/drivers/pci/controller/dwc/Kconfig +++ b/drivers/pci/controller/dwc/Kconfig @@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP Enables support for the PCIe controller in the ARTPEC-6 SoC to work in endpoint mode. This uses the DesignWare core. +config PCIE_BT1 + tristate "Baikal-T1 PCIe controller" + depends on MIPS_BAIKAL_T1 || COMPILE_TEST + depends on PCI_MSI_IRQ_DOMAIN + select PCIE_DW_HOST + help + Enables support for the PCIe controller in the Baikal-T1 SoC to work + in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core. + config PCIE_ROCKCHIP_DW_HOST bool "Rockchip DesignWare PCIe controller" select PCIE_DW diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile index 8ba7b67f5e50a9ab268728cf8563c5143e4f1f55..bf5c311875a1ee388ed6ce1dbc7c66029724a207 100644 --- a/drivers/pci/controller/dwc/Makefile +++ b/drivers/pci/controller/dwc/Makefile @@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o +obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c index 2616585ca5f8aa0982775c06eeebd1062c918976..1dde5c579edc82a0d38925b45c078a48ba390619 100644 --- a/drivers/pci/controller/dwc/pci-imx6.c +++ b/drivers/pci/controller/dwc/pci-imx6.c @@ -952,12 +952,6 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) } } - ret = imx6_pcie_deassert_core_reset(imx6_pcie); - if (ret < 0) { - dev_err(dev, "pcie deassert core reset failed: %d\n", ret); - goto err_phy_off; - } - if (imx6_pcie->phy) { ret = phy_power_on(imx6_pcie->phy); if (ret) { @@ -965,6 +959,13 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp) goto err_phy_off; } } + + ret = imx6_pcie_deassert_core_reset(imx6_pcie); + if (ret < 0) { + dev_err(dev, "pcie deassert core reset failed: %d\n", ret); + goto err_phy_off; + } + imx6_setup_phy_mpll(imx6_pcie); return 0; diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c index 879b8692f96a515ac8b85d149ab409dc4506b73c..ed5fb492fe084fbb4f20dc004a275834d57087c5 100644 --- a/drivers/pci/controller/dwc/pci-layerscape.c +++ b/drivers/pci/controller/dwc/pci-layerscape.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c index dc469ef8e99b3b2e3cd625bd83a4ad4ecba5cb4a..5c999e15c357f0a598d631d094afb3818dd708d0 100644 --- a/drivers/pci/controller/dwc/pcie-armada8k.c +++ b/drivers/pci/controller/dwc/pcie-armada8k.c @@ -21,7 +21,6 @@ #include #include #include -#include #include "pcie-designware.h" diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c new file mode 100644 index 0000000000000000000000000000000000000000..3346770e6654794ffd5886f978a9d8e6d22c0c05 --- /dev/null +++ b/drivers/pci/controller/dwc/pcie-bt1.c @@ -0,0 +1,643 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC + * + * Authors: + * Vadim Vlasov + * Serge Semin + * + * Baikal-T1 PCIe controller driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcie-designware.h" + +/* Baikal-T1 System CCU control registers */ +#define BT1_CCU_PCIE_CLKC 0x140 +#define BT1_CCU_PCIE_REQ_PCS_CLK BIT(16) +#define BT1_CCU_PCIE_REQ_MAC_CLK BIT(17) +#define BT1_CCU_PCIE_REQ_PIPE_CLK BIT(18) + +#define BT1_CCU_PCIE_RSTC 0x144 +#define BT1_CCU_PCIE_REQ_LINK_RST BIT(13) +#define BT1_CCU_PCIE_REQ_SMLH_RST BIT(14) +#define BT1_CCU_PCIE_REQ_PHY_RST BIT(16) +#define BT1_CCU_PCIE_REQ_CORE_RST BIT(24) +#define BT1_CCU_PCIE_REQ_STICKY_RST BIT(26) +#define BT1_CCU_PCIE_REQ_NSTICKY_RST BIT(27) + +#define BT1_CCU_PCIE_PMSC 0x148 +#define BT1_CCU_PCIE_LTSSM_STATE_MASK GENMASK(5, 0) +#define BT1_CCU_PCIE_LTSSM_DET_QUIET 0x00 +#define BT1_CCU_PCIE_LTSSM_DET_ACT 0x01 +#define BT1_CCU_PCIE_LTSSM_POLL_ACT 0x02 +#define BT1_CCU_PCIE_LTSSM_POLL_COMP 0x03 +#define BT1_CCU_PCIE_LTSSM_POLL_CONF 0x04 +#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET 0x05 +#define BT1_CCU_PCIE_LTSSM_DET_WAIT 0x06 +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START 0x07 +#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT 0x08 +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT 0x09 +#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT 0x0a +#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE 0x0b +#define BT1_CCU_PCIE_LTSSM_CFG_IDLE 0x0c +#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK 0x0d +#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED 0x0e +#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG 0x0f +#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE 0x10 +#define BT1_CCU_PCIE_LTSSM_L0 0x11 +#define BT1_CCU_PCIE_LTSSM_L0S 0x12 +#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE 0x13 +#define BT1_CCU_PCIE_LTSSM_L1_IDLE 0x14 +#define BT1_CCU_PCIE_LTSSM_L2_IDLE 0x15 +#define BT1_CCU_PCIE_LTSSM_L2_WAKE 0x16 +#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY 0x17 +#define BT1_CCU_PCIE_LTSSM_DIS_IDLE 0x18 +#define BT1_CCU_PCIE_LTSSM_DISABLE 0x19 +#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY 0x1a +#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE 0x1b +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT 0x1c +#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT 0x1d +#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY 0x1e +#define BT1_CCU_PCIE_LTSSM_HOT_RST 0x1f +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0 0x20 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1 0x21 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2 0x22 +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3 0x23 +#define BT1_CCU_PCIE_SMLH_LINKUP BIT(6) +#define BT1_CCU_PCIE_RDLH_LINKUP BIT(7) +#define BT1_CCU_PCIE_PM_LINKSTATE_L0S BIT(8) +#define BT1_CCU_PCIE_PM_LINKSTATE_L1 BIT(9) +#define BT1_CCU_PCIE_PM_LINKSTATE_L2 BIT(10) +#define BT1_CCU_PCIE_L1_PENDING BIT(12) +#define BT1_CCU_PCIE_REQ_EXIT_L1 BIT(14) +#define BT1_CCU_PCIE_LTSSM_RCVR_EQ BIT(15) +#define BT1_CCU_PCIE_PM_DSTAT_MASK GENMASK(18, 16) +#define BT1_CCU_PCIE_PM_PME_EN BIT(20) +#define BT1_CCU_PCIE_PM_PME_STATUS BIT(21) +#define BT1_CCU_PCIE_AUX_PM_EN BIT(22) +#define BT1_CCU_PCIE_AUX_PWR_DET BIT(23) +#define BT1_CCU_PCIE_WAKE_DET BIT(24) +#define BT1_CCU_PCIE_TURNOFF_REQ BIT(30) +#define BT1_CCU_PCIE_TURNOFF_ACK BIT(31) + +#define BT1_CCU_PCIE_GENC 0x14c +#define BT1_CCU_PCIE_LTSSM_EN BIT(1) +#define BT1_CCU_PCIE_DBI2_MODE BIT(2) +#define BT1_CCU_PCIE_MGMT_EN BIT(3) +#define BT1_CCU_PCIE_RXLANE_FLIP_EN BIT(16) +#define BT1_CCU_PCIE_TXLANE_FLIP_EN BIT(17) +#define BT1_CCU_PCIE_SLV_XFER_PEND BIT(24) +#define BT1_CCU_PCIE_RCV_XFER_PEND BIT(25) +#define BT1_CCU_PCIE_DBI_XFER_PEND BIT(26) +#define BT1_CCU_PCIE_DMA_XFER_PEND BIT(27) + +#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \ +({ \ + int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \ + __state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \ +}) + +/* Baikal-T1 PCIe specific control registers */ +#define BT1_PCIE_AXI2MGM_LANENUM 0xd04 +#define BT1_PCIE_AXI2MGM_LANESEL_MASK GENMASK(3, 0) + +#define BT1_PCIE_AXI2MGM_ADDRCTL 0xd08 +#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK GENMASK(20, 0) +#define BT1_PCIE_AXI2MGM_READ_FLAG BIT(29) +#define BT1_PCIE_AXI2MGM_DONE BIT(30) +#define BT1_PCIE_AXI2MGM_BUSY BIT(31) + +#define BT1_PCIE_AXI2MGM_WRITEDATA 0xd0c +#define BT1_PCIE_AXI2MGM_WDATA GENMASK(15, 0) + +#define BT1_PCIE_AXI2MGM_READDATA 0xd10 +#define BT1_PCIE_AXI2MGM_RDATA GENMASK(15, 0) + +/* Generic Baikal-T1 PCIe interface resources */ +#define BT1_PCIE_NUM_APP_CLKS ARRAY_SIZE(bt1_pcie_app_clks) +#define BT1_PCIE_NUM_CORE_CLKS ARRAY_SIZE(bt1_pcie_core_clks) +#define BT1_PCIE_NUM_APP_RSTS ARRAY_SIZE(bt1_pcie_app_rsts) +#define BT1_PCIE_NUM_CORE_RSTS ARRAY_SIZE(bt1_pcie_core_rsts) + +/* PCIe bus setup delays and timeouts */ +#define BT1_PCIE_RST_DELAY_MS 100 +#define BT1_PCIE_RUN_DELAY_US 100 +#define BT1_PCIE_REQ_DELAY_US 1 +#define BT1_PCIE_REQ_TIMEOUT_US 1000 +#define BT1_PCIE_LNK_DELAY_US 1000 +#define BT1_PCIE_LNK_TIMEOUT_US 1000000 + +static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = { + DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK, +}; + +static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = { + DW_PCIE_REF_CLK, +}; + +static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = { + DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST, +}; + +static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = { + DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST, + DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST, +}; + +struct bt1_pcie { + struct dw_pcie dw; + struct platform_device *pdev; + struct regmap *sys_regs; +}; +#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw) + +/* + * Baikal-T1 MMIO space must be read/written by the dword-aligned + * instructions. Note the methods are optimized to have the dword operations + * performed with minimum overhead as the most frequently used ones. + */ +static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val) +{ + unsigned int ofs = (uintptr_t)addr & 0x3; + + if (!IS_ALIGNED((uintptr_t)addr, size)) + return -EINVAL; + + *val = readl(addr - ofs) >> ofs * BITS_PER_BYTE; + if (size == 4) { + return 0; + } else if (size == 2) { + *val &= 0xffff; + return 0; + } else if (size == 1) { + *val &= 0xff; + return 0; + } + + return -EINVAL; +} + +static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val) +{ + unsigned int ofs = (uintptr_t)addr & 0x3; + u32 tmp, mask; + + if (!IS_ALIGNED((uintptr_t)addr, size)) + return -EINVAL; + + if (size == 4) { + writel(val, addr); + return 0; + } else if (size == 2 || size == 1) { + mask = GENMASK(size * BITS_PER_BYTE - 1, 0); + tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE); + tmp |= (val & mask) << ofs * BITS_PER_BYTE; + writel(tmp, addr - ofs); + return 0; + } + + return -EINVAL; +} + +static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size) +{ + int ret; + u32 val; + + ret = bt1_pcie_read_mmio(base + reg, size, &val); + if (ret) { + dev_err(pci->dev, "Read DBI address failed\n"); + return ~0U; + } + + return val; +} + +static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size, u32 val) +{ + int ret; + + ret = bt1_pcie_write_mmio(base + reg, size, val); + if (ret) + dev_err(pci->dev, "Write DBI address failed\n"); +} + +static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg, + size_t size, u32 val) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + int ret; + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE); + + ret = bt1_pcie_write_mmio(base + reg, size, val); + if (ret) + dev_err(pci->dev, "Write DBI2 address failed\n"); + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_DBI2_MODE, 0); +} + +static int bt1_pcie_start_link(struct dw_pcie *pci) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + u32 val; + int ret; + + /* + * Enable LTSSM and make sure it was able to establish both PHY and + * data links. This procedure shall work fine to reach 2.5 GT/s speed. + */ + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN); + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + (val & BT1_CCU_PCIE_SMLH_LINKUP), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) { + dev_err(pci->dev, "LTSSM failed to set PHY link up\n"); + return ret; + } + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + (val & BT1_CCU_PCIE_RDLH_LINKUP), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) { + dev_err(pci->dev, "LTSSM failed to set data link up\n"); + return ret; + } + + /* + * Activate direct speed change after the link is established in an + * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s). + * This is required at least to get 8.0 GT/s speed. + */ + val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); + val |= PORT_LOGIC_SPEED_CHANGE; + dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); + + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val, + BT1_CCU_PCIE_LTSSM_LINKUP(val), + BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US); + if (ret) + dev_err(pci->dev, "LTSSM failed to get into L0 state\n"); + + return ret; +} + +static void bt1_pcie_stop_link(struct dw_pcie *pci) +{ + struct bt1_pcie *btpci = to_bt1_pcie(pci); + + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, 0); +} + +static const struct dw_pcie_ops bt1_pcie_ops = { + .read_dbi = bt1_pcie_read_dbi, + .write_dbi = bt1_pcie_write_dbi, + .write_dbi2 = bt1_pcie_write_dbi2, + .start_link = bt1_pcie_start_link, + .stop_link = bt1_pcie_stop_link, +}; + +static struct pci_ops bt1_pci_ops = { + .map_bus = dw_pcie_own_conf_map_bus, + .read = pci_generic_config_read32, + .write = pci_generic_config_write32, +}; + +static int bt1_pcie_get_resources(struct bt1_pcie *btpci) +{ + struct device *dev = btpci->dw.dev; + int i; + + /* DBI access is supposed to be performed by the dword-aligned IOs */ + btpci->dw.pp.bridge->ops = &bt1_pci_ops; + + /* These CSRs are in MMIO so we won't check the regmap-methods status */ + btpci->sys_regs = + syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon"); + if (IS_ERR(btpci->sys_regs)) + return dev_err_probe(dev, PTR_ERR(btpci->sys_regs), + "Failed to get syscon\n"); + + /* Make sure all the required resources have been specified */ + for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) { + if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) { + dev_err(dev, "App clocks set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) { + if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) { + dev_err(dev, "Core clocks set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) { + if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) { + dev_err(dev, "App resets set is incomplete\n"); + return -ENOENT; + } + } + + for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) { + if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) { + dev_err(dev, "Core resets set is incomplete\n"); + return -ENOENT; + } + } + + return 0; +} + +static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init) +{ + struct device *dev = btpci->dw.dev; + struct dw_pcie *pci = &btpci->dw; + int ret; + + /* Disable LTSSM for sure */ + regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC, + BT1_CCU_PCIE_LTSSM_EN, 0); + + /* + * Application reset controls are trigger-based so assert the core + * resets only. + */ + ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts); + if (ret) + dev_err(dev, "Failed to assert core resets\n"); + + /* + * Clocks are disabled by default at least in accordance with the clk + * enable counter value on init stage. + */ + if (!init) { + clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + + clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + } + + /* The peripheral devices are unavailable anyway so reset them too */ + gpiod_set_value_cansleep(pci->pe_rst, 1); + + /* Make sure all the resets are settled */ + msleep(BT1_PCIE_RST_DELAY_MS); +} + +/* + * Implements the cold reset procedure in accordance with the reference manual + * and available PM signals. + */ +static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci) +{ + struct device *dev = btpci->dw.dev; + struct dw_pcie *pci = &btpci->dw; + u32 val; + int ret; + + /* First get out of the Power/Hot reset state */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PHY reset\n"); + return ret; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert hot reset\n"); + goto err_assert_pwr_rst; + } + + /* Wait for the PM-core to stop requesting the PHY reset */ + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val, + !(val & BT1_CCU_PCIE_REQ_PHY_RST), + BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US); + if (ret) { + dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n"); + goto err_assert_hot_rst; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PHY reset\n"); + goto err_assert_hot_rst; + } + + /* Clocks can be now enabled, but the ref one is crucial at this stage */ + ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + if (ret) { + dev_err(dev, "Failed to enable app clocks\n"); + goto err_assert_phy_rst; + } + + ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + if (ret) { + dev_err(dev, "Failed to enable ref clocks\n"); + goto err_disable_app_clk; + } + + /* Wait for the PM to stop requesting the controller core reset */ + ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val, + !(val & BT1_CCU_PCIE_REQ_CORE_RST), + BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US); + if (ret) { + dev_err(dev, "Timed out waiting for PM to stop core resetting\n"); + goto err_disable_core_clk; + } + + /* PCS-PIPE interface and controller core can be now activated */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert PIPE reset\n"); + goto err_disable_core_clk; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert core reset\n"); + goto err_assert_pipe_rst; + } + + /* It's recommended to reset the core and application logic together */ + ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts); + if (ret) { + dev_err(dev, "Failed to reset app domain\n"); + goto err_assert_core_rst; + } + + /* Sticky/Non-sticky CSR flags can be now unreset too */ + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert sticky reset\n"); + goto err_assert_core_rst; + } + + ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc); + if (ret) { + dev_err(dev, "Failed to deassert non-sticky reset\n"); + goto err_assert_sticky_rst; + } + + /* Activate the PCIe bus peripheral devices */ + gpiod_set_value_cansleep(pci->pe_rst, 0); + + /* Make sure the state is settled (LTSSM is still disabled though) */ + usleep_range(BT1_PCIE_RUN_DELAY_US, BT1_PCIE_RUN_DELAY_US + 100); + + return 0; + +err_assert_sticky_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc); + +err_assert_core_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc); + +err_assert_pipe_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc); + +err_disable_core_clk: + clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks); + +err_disable_app_clk: + clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks); + +err_assert_phy_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc); + +err_assert_hot_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc); + +err_assert_pwr_rst: + reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc); + + return ret; +} + +static int bt1_pcie_host_init(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct bt1_pcie *btpci = to_bt1_pcie(pci); + int ret; + + ret = bt1_pcie_get_resources(btpci); + if (ret) + return ret; + + bt1_pcie_full_stop_bus(btpci, true); + + return bt1_pcie_cold_start_bus(btpci); +} + +static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp) +{ + struct dw_pcie *pci = to_dw_pcie_from_pp(pp); + struct bt1_pcie *btpci = to_bt1_pcie(pci); + + bt1_pcie_full_stop_bus(btpci, false); +} + +static const struct dw_pcie_host_ops bt1_pcie_host_ops = { + .host_init = bt1_pcie_host_init, + .host_deinit = bt1_pcie_host_deinit, +}; + +static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev) +{ + struct bt1_pcie *btpci; + + btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL); + if (!btpci) + return ERR_PTR(-ENOMEM); + + btpci->pdev = pdev; + + platform_set_drvdata(pdev, btpci); + + return btpci; +} + +static int bt1_pcie_add_port(struct bt1_pcie *btpci) +{ + struct device *dev = &btpci->pdev->dev; + int ret; + + btpci->dw.version = DW_PCIE_VER_460A; + btpci->dw.dev = dev; + btpci->dw.ops = &bt1_pcie_ops; + + btpci->dw.pp.num_vectors = MAX_MSI_IRQS; + btpci->dw.pp.ops = &bt1_pcie_host_ops; + + dw_pcie_cap_set(&btpci->dw, REQ_RES); + + ret = dw_pcie_host_init(&btpci->dw.pp); + + return dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n"); +} + +static void bt1_pcie_del_port(struct bt1_pcie *btpci) +{ + dw_pcie_host_deinit(&btpci->dw.pp); +} + +static int bt1_pcie_probe(struct platform_device *pdev) +{ + struct bt1_pcie *btpci; + + btpci = bt1_pcie_create_data(pdev); + if (IS_ERR(btpci)) + return PTR_ERR(btpci); + + return bt1_pcie_add_port(btpci); +} + +static int bt1_pcie_remove(struct platform_device *pdev) +{ + struct bt1_pcie *btpci = platform_get_drvdata(pdev); + + bt1_pcie_del_port(btpci); + + return 0; +} + +static const struct of_device_id bt1_pcie_of_match[] = { + { .compatible = "baikal,bt1-pcie" }, + {}, +}; +MODULE_DEVICE_TABLE(of, bt1_pcie_of_match); + +static struct platform_driver bt1_pcie_driver = { + .probe = bt1_pcie_probe, + .remove = bt1_pcie_remove, + .driver = { + .name = "bt1-pcie", + .of_match_table = bt1_pcie_of_match, + }, +}; +module_platform_driver(bt1_pcie_driver); + +MODULE_AUTHOR("Serge Semin "); +MODULE_DESCRIPTION("Baikal-T1 PCIe driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c index 83ddb190292e4f5569254d735851f51ad4ef986d..d06654895ebae11886ea8c1ddaff00bfd407b266 100644 --- a/drivers/pci/controller/dwc/pcie-designware-ep.c +++ b/drivers/pci/controller/dwc/pcie-designware-ep.c @@ -13,8 +13,6 @@ #include #include -#include "../../pci.h" - void dw_pcie_ep_linkup(struct dw_pcie_ep *ep) { struct pci_epc *epc = ep->epc; @@ -171,8 +169,8 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type, return -EINVAL; } - ret = dw_pcie_prog_inbound_atu(pci, func_no, free_win, type, - cpu_addr, bar); + ret = dw_pcie_prog_ep_inbound_atu(pci, func_no, free_win, type, + cpu_addr, bar); if (ret < 0) { dev_err(pci->dev, "Failed to program IB window\n"); return ret; @@ -643,7 +641,7 @@ static unsigned int dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap) int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) { struct dw_pcie *pci = to_dw_pcie_from_ep(ep); - unsigned int offset; + unsigned int offset, ptm_cap_base; unsigned int nbars; u8 hdr_type; u32 reg; @@ -659,6 +657,7 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) } offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR); + ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM); dw_pcie_dbi_ro_wr_en(pci); @@ -671,6 +670,22 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep) dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0); } + /* + * PTM responder capability can be disabled only after disabling + * PTM root capability. + */ + if (ptm_cap_base) { + dw_pcie_dbi_ro_wr_en(pci); + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); + reg &= ~PCI_PTM_CAP_ROOT; + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); + + reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP); + reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK); + dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg); + dw_pcie_dbi_ro_wr_dis(pci); + } + dw_pcie_setup(pci); dw_pcie_dbi_ro_wr_dis(pci); @@ -694,23 +709,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) INIT_LIST_HEAD(&ep->func_list); - if (!pci->dbi_base) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pci->dbi_base = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base)) - return PTR_ERR(pci->dbi_base); - } - - if (!pci->dbi_base2) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2"); - if (!res) { - pci->dbi_base2 = pci->dbi_base + SZ_4K; - } else { - pci->dbi_base2 = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base2)) - return PTR_ERR(pci->dbi_base2); - } - } + ret = dw_pcie_get_resources(pci); + if (ret) + return ret; res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space"); if (!res) @@ -739,9 +740,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep) return -ENOMEM; ep->outbound_addr = addr; - if (pci->link_gen < 1) - pci->link_gen = of_pci_get_max_link_speed(np); - epc = devm_pci_epc_create(dev, &epc_ops); if (IS_ERR(epc)) { dev_err(dev, "Failed to create epc device\n"); diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c index 6241d3f4e9d5108c8dfe3418d36749b8d3378866..b4ea065fe92254ed72b578b21e9e46f1a7f1ab08 100644 --- a/drivers/pci/controller/dwc/pcie-designware-host.c +++ b/drivers/pci/controller/dwc/pcie-designware-host.c @@ -16,7 +16,6 @@ #include #include -#include "../../pci.h" #include "pcie-designware.h" static struct pci_ops dw_pcie_ops; @@ -396,6 +395,10 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) raw_spin_lock_init(&pp->lock); + ret = dw_pcie_get_resources(pci); + if (ret) + return ret; + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config"); if (res) { pp->cfg0_size = resource_size(res); @@ -409,13 +412,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) return -ENODEV; } - if (!pci->dbi_base) { - res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); - pci->dbi_base = devm_pci_remap_cfg_resource(dev, res); - if (IS_ERR(pci->dbi_base)) - return PTR_ERR(pci->dbi_base); - } - bridge = devm_pci_alloc_host_bridge(dev, 0); if (!bridge) return -ENOMEM; @@ -430,9 +426,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp) pp->io_base = pci_pio_to_address(win->res->start); } - if (pci->link_gen < 1) - pci->link_gen = of_pci_get_max_link_speed(np); - /* Set default bus ops */ bridge->ops = &dw_pcie_ops; bridge->child_ops = &dw_child_pcie_ops; @@ -644,12 +637,15 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) } /* - * Ensure all outbound windows are disabled before proceeding with - * the MEM/IO ranges setups. + * Ensure all out/inbound windows are disabled before proceeding with + * the MEM/IO (dma-)ranges setups. */ for (i = 0; i < pci->num_ob_windows; i++) dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, i); + for (i = 0; i < pci->num_ib_windows; i++) + dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, i); + i = 0; resource_list_for_each_entry(entry, &pp->bridge->windows) { if (resource_type(entry->res) != IORESOURCE_MEM) @@ -686,9 +682,32 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp) } if (pci->num_ob_windows <= i) - dev_warn(pci->dev, "Resources exceed number of ATU entries (%d)\n", + dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n", pci->num_ob_windows); + i = 0; + resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) { + if (resource_type(entry->res) != IORESOURCE_MEM) + continue; + + if (pci->num_ib_windows <= i) + break; + + ret = dw_pcie_prog_inbound_atu(pci, i++, PCIE_ATU_TYPE_MEM, + entry->res->start, + entry->res->start - entry->offset, + resource_size(entry->res)); + if (ret) { + dev_err(pci->dev, "Failed to set DMA range %pr\n", + entry->res); + return ret; + } + } + + if (pci->num_ib_windows <= i) + dev_warn(pci->dev, "Dma-ranges exceed inbound iATU size (%u)\n", + pci->num_ib_windows); + return 0; } diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c index c6725c519a479976f34c4dbb2973eec974fab66b..6d5d619ab2e9412e0c4cd4376433ed67daba7927 100644 --- a/drivers/pci/controller/dwc/pcie-designware.c +++ b/drivers/pci/controller/dwc/pcie-designware.c @@ -10,7 +10,10 @@ #include #include +#include #include +#include +#include #include #include #include @@ -19,6 +22,148 @@ #include "../../pci.h" #include "pcie-designware.h" +static const char * const dw_pcie_app_clks[DW_PCIE_NUM_APP_CLKS] = { + [DW_PCIE_DBI_CLK] = "dbi", + [DW_PCIE_MSTR_CLK] = "mstr", + [DW_PCIE_SLV_CLK] = "slv", +}; + +static const char * const dw_pcie_core_clks[DW_PCIE_NUM_CORE_CLKS] = { + [DW_PCIE_PIPE_CLK] = "pipe", + [DW_PCIE_CORE_CLK] = "core", + [DW_PCIE_AUX_CLK] = "aux", + [DW_PCIE_REF_CLK] = "ref", +}; + +static const char * const dw_pcie_app_rsts[DW_PCIE_NUM_APP_RSTS] = { + [DW_PCIE_DBI_RST] = "dbi", + [DW_PCIE_MSTR_RST] = "mstr", + [DW_PCIE_SLV_RST] = "slv", +}; + +static const char * const dw_pcie_core_rsts[DW_PCIE_NUM_CORE_RSTS] = { + [DW_PCIE_NON_STICKY_RST] = "non-sticky", + [DW_PCIE_STICKY_RST] = "sticky", + [DW_PCIE_CORE_RST] = "core", + [DW_PCIE_PIPE_RST] = "pipe", + [DW_PCIE_PHY_RST] = "phy", + [DW_PCIE_HOT_RST] = "hot", + [DW_PCIE_PWR_RST] = "pwr", +}; + +static int dw_pcie_get_clocks(struct dw_pcie *pci) +{ + int i, ret; + + for (i = 0; i < DW_PCIE_NUM_APP_CLKS; i++) + pci->app_clks[i].id = dw_pcie_app_clks[i]; + + for (i = 0; i < DW_PCIE_NUM_CORE_CLKS; i++) + pci->core_clks[i].id = dw_pcie_core_clks[i]; + + ret = devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_APP_CLKS, + pci->app_clks); + if (ret) + return ret; + + return devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_CORE_CLKS, + pci->core_clks); +} + +static int dw_pcie_get_resets(struct dw_pcie *pci) +{ + int i, ret; + + for (i = 0; i < DW_PCIE_NUM_APP_RSTS; i++) + pci->app_rsts[i].id = dw_pcie_app_rsts[i]; + + for (i = 0; i < DW_PCIE_NUM_CORE_RSTS; i++) + pci->core_rsts[i].id = dw_pcie_core_rsts[i]; + + ret = devm_reset_control_bulk_get_optional_shared(pci->dev, + DW_PCIE_NUM_APP_RSTS, + pci->app_rsts); + if (ret) + return ret; + + ret = devm_reset_control_bulk_get_optional_exclusive(pci->dev, + DW_PCIE_NUM_CORE_RSTS, + pci->core_rsts); + if (ret) + return ret; + + pci->pe_rst = devm_gpiod_get_optional(pci->dev, "reset", GPIOD_OUT_HIGH); + if (IS_ERR(pci->pe_rst)) + return PTR_ERR(pci->pe_rst); + + return 0; +} + +int dw_pcie_get_resources(struct dw_pcie *pci) +{ + struct platform_device *pdev = to_platform_device(pci->dev); + struct device_node *np = dev_of_node(pci->dev); + struct resource *res; + int ret; + + if (!pci->dbi_base) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi"); + pci->dbi_base = devm_pci_remap_cfg_resource(pci->dev, res); + if (IS_ERR(pci->dbi_base)) + return PTR_ERR(pci->dbi_base); + } + + /* DBI2 is mainly useful for the endpoint controller */ + if (!pci->dbi_base2) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2"); + if (res) { + pci->dbi_base2 = devm_pci_remap_cfg_resource(pci->dev, res); + if (IS_ERR(pci->dbi_base2)) + return PTR_ERR(pci->dbi_base2); + } else { + pci->dbi_base2 = pci->dbi_base + SZ_4K; + } + } + + /* For non-unrolled iATU/eDMA platforms this range will be ignored */ + if (!pci->atu_base) { + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu"); + if (res) { + pci->atu_size = resource_size(res); + pci->atu_base = devm_ioremap_resource(pci->dev, res); + if (IS_ERR(pci->atu_base)) + return PTR_ERR(pci->atu_base); + } else { + pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; + } + } + + /* Set a default value suitable for at most 8 in and 8 out windows */ + if (!pci->atu_size) + pci->atu_size = SZ_4K; + + /* LLDD is supposed to manually switch the clocks and resets state */ + if (dw_pcie_cap_is(pci, REQ_RES)) { + ret = dw_pcie_get_clocks(pci); + if (ret) + return ret; + + ret = dw_pcie_get_resets(pci); + if (ret) + return ret; + } + + if (pci->link_gen < 1) + pci->link_gen = of_pci_get_max_link_speed(np); + + of_property_read_u32(np, "num-lanes", &pci->num_lanes); + + if (of_property_read_bool(np, "snps,enable-cdm-check")) + dw_pcie_cap_set(pci, CDM_CHECK); + + return 0; +} + void dw_pcie_version_detect(struct dw_pcie *pci) { u32 ver; @@ -211,7 +356,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val) static inline void __iomem *dw_pcie_select_atu(struct dw_pcie *pci, u32 dir, u32 index) { - if (pci->iatu_unroll_enabled) + if (dw_pcie_cap_is(pci, IATU_UNROLL)) return pci->atu_base + PCIE_ATU_UNROLL_BASE(dir, index); dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, dir | index); @@ -393,8 +538,60 @@ static inline void dw_pcie_writel_atu_ib(struct dw_pcie *pci, u32 index, u32 reg dw_pcie_writel_atu(pci, PCIE_ATU_REGION_DIR_IB, index, reg, val); } -int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, - int type, u64 cpu_addr, u8 bar) +int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type, + u64 cpu_addr, u64 pci_addr, u64 size) +{ + u64 limit_addr = pci_addr + size - 1; + u32 retries, val; + + if ((limit_addr & ~pci->region_limit) != (pci_addr & ~pci->region_limit) || + !IS_ALIGNED(cpu_addr, pci->region_align) || + !IS_ALIGNED(pci_addr, pci->region_align) || !size) { + return -EINVAL; + } + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_BASE, + lower_32_bits(pci_addr)); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_BASE, + upper_32_bits(pci_addr)); + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LIMIT, + lower_32_bits(limit_addr)); + if (dw_pcie_ver_is_ge(pci, 460A)) + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_LIMIT, + upper_32_bits(limit_addr)); + + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_TARGET, + lower_32_bits(cpu_addr)); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_TARGET, + upper_32_bits(cpu_addr)); + + val = type; + if (upper_32_bits(limit_addr) > upper_32_bits(pci_addr) && + dw_pcie_ver_is_ge(pci, 460A)) + val |= PCIE_ATU_INCREASE_REGION_SIZE; + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL1, val); + dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2, PCIE_ATU_ENABLE); + + /* + * Make sure ATU enable takes effect before any subsequent config + * and I/O accesses. + */ + for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) { + val = dw_pcie_readl_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2); + if (val & PCIE_ATU_ENABLE) + return 0; + + mdelay(LINK_WAIT_IATU); + } + + dev_err(pci->dev, "Inbound iATU is not being enabled\n"); + + return -ETIMEDOUT; +} + +int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, + int type, u64 cpu_addr, u8 bar) { u32 retries, val; @@ -448,7 +645,7 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci) } if (retries >= LINK_WAIT_MAX_RETRIES) { - dev_err(pci->dev, "Phy link never came up\n"); + dev_info(pci->dev, "Phy link never came up\n"); return -ETIMEDOUT; } @@ -522,26 +719,21 @@ static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen) } -static bool dw_pcie_iatu_unroll_enabled(struct dw_pcie *pci) -{ - u32 val; - - val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT); - if (val == 0xffffffff) - return true; - - return false; -} - -static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) +void dw_pcie_iatu_detect(struct dw_pcie *pci) { int max_region, ob, ib; u32 val, min, dir; u64 max; - if (pci->iatu_unroll_enabled) { + val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT); + if (val == 0xFFFFFFFF) { + dw_pcie_cap_set(pci, IATU_UNROLL); + max_region = min((int)pci->atu_size / 512, 256); } else { + pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE; + pci->atu_size = PCIE_ATU_VIEWPORT_SIZE; + dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF); max_region = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT) + 1; } @@ -583,46 +775,15 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci) pci->num_ib_windows = ib; pci->region_align = 1 << fls(min); pci->region_limit = (max << 32) | (SZ_4G - 1); -} -void dw_pcie_iatu_detect(struct dw_pcie *pci) -{ - struct platform_device *pdev = to_platform_device(pci->dev); - - pci->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pci); - if (pci->iatu_unroll_enabled) { - if (!pci->atu_base) { - struct resource *res = - platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu"); - if (res) { - pci->atu_size = resource_size(res); - pci->atu_base = devm_ioremap_resource(pci->dev, res); - } - if (!pci->atu_base || IS_ERR(pci->atu_base)) - pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET; - } - - if (!pci->atu_size) - /* Pick a minimal default, enough for 8 in and 8 out windows */ - pci->atu_size = SZ_4K; - } else { - pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE; - pci->atu_size = PCIE_ATU_VIEWPORT_SIZE; - } - - dw_pcie_iatu_detect_regions(pci); - - dev_info(pci->dev, "iATU unroll: %s\n", pci->iatu_unroll_enabled ? - "enabled" : "disabled"); - - dev_info(pci->dev, "iATU regions: %u ob, %u ib, align %uK, limit %lluG\n", + dev_info(pci->dev, "iATU: unroll %s, %u ob, %u ib, align %uK, limit %lluG\n", + dw_pcie_cap_is(pci, IATU_UNROLL) ? "T" : "F", pci->num_ob_windows, pci->num_ib_windows, pci->region_align / SZ_1K, (pci->region_limit + 1) / SZ_1G); } void dw_pcie_setup(struct dw_pcie *pci) { - struct device_node *np = pci->dev->of_node; u32 val; if (pci->link_gen > 0) @@ -641,7 +802,7 @@ void dw_pcie_setup(struct dw_pcie *pci) if (pci->n_fts[1]) { val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL); val &= ~PORT_LOGIC_N_FTS_MASK; - val |= pci->n_fts[pci->link_gen - 1]; + val |= pci->n_fts[1]; dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val); } @@ -650,14 +811,13 @@ void dw_pcie_setup(struct dw_pcie *pci) val |= PORT_LINK_DLL_LINK_EN; dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val); - if (of_property_read_bool(np, "snps,enable-cdm-check")) { + if (dw_pcie_cap_is(pci, CDM_CHECK)) { val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS); val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS | PCIE_PL_CHK_REG_CHK_REG_START; dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val); } - of_property_read_u32(np, "num-lanes", &pci->num_lanes); if (!pci->num_lanes) { dev_dbg(pci->dev, "Using h/w default number of lanes\n"); return; diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h index a871ae7eb59eca4cf2ded8ab4368e4227f3c5755..393dfb931df6cc66258b47d02683ccec511efa22 100644 --- a/drivers/pci/controller/dwc/pcie-designware.h +++ b/drivers/pci/controller/dwc/pcie-designware.h @@ -12,10 +12,14 @@ #define _PCIE_DESIGNWARE_H #include +#include +#include #include +#include #include #include #include +#include #include #include @@ -43,6 +47,17 @@ (__dw_pcie_ver_cmp(_pci, _ver, ==) && \ __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=)) +/* DWC PCIe controller capabilities */ +#define DW_PCIE_CAP_REQ_RES 0 +#define DW_PCIE_CAP_IATU_UNROLL 1 +#define DW_PCIE_CAP_CDM_CHECK 2 + +#define dw_pcie_cap_is(_pci, _cap) \ + test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) + +#define dw_pcie_cap_set(_pci, _cap) \ + set_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps) + /* Parameters for the waiting for link up routine */ #define LINK_WAIT_MAX_RETRIES 10 #define LINK_WAIT_USLEEP_MIN 90000 @@ -222,6 +237,39 @@ enum dw_pcie_device_mode { DW_PCIE_RC_TYPE, }; +enum dw_pcie_app_clk { + DW_PCIE_DBI_CLK, + DW_PCIE_MSTR_CLK, + DW_PCIE_SLV_CLK, + DW_PCIE_NUM_APP_CLKS +}; + +enum dw_pcie_core_clk { + DW_PCIE_PIPE_CLK, + DW_PCIE_CORE_CLK, + DW_PCIE_AUX_CLK, + DW_PCIE_REF_CLK, + DW_PCIE_NUM_CORE_CLKS +}; + +enum dw_pcie_app_rst { + DW_PCIE_DBI_RST, + DW_PCIE_MSTR_RST, + DW_PCIE_SLV_RST, + DW_PCIE_NUM_APP_RSTS +}; + +enum dw_pcie_core_rst { + DW_PCIE_NON_STICKY_RST, + DW_PCIE_STICKY_RST, + DW_PCIE_CORE_RST, + DW_PCIE_PIPE_RST, + DW_PCIE_PHY_RST, + DW_PCIE_HOT_RST, + DW_PCIE_PWR_RST, + DW_PCIE_NUM_CORE_RSTS +}; + struct dw_pcie_host_ops { int (*host_init)(struct dw_pcie_rp *pp); void (*host_deinit)(struct dw_pcie_rp *pp); @@ -317,10 +365,15 @@ struct dw_pcie { const struct dw_pcie_ops *ops; u32 version; u32 type; + unsigned long caps; int num_lanes; int link_gen; u8 n_fts[2]; - bool iatu_unroll_enabled: 1; + struct clk_bulk_data app_clks[DW_PCIE_NUM_APP_CLKS]; + struct clk_bulk_data core_clks[DW_PCIE_NUM_CORE_CLKS]; + struct reset_control_bulk_data app_rsts[DW_PCIE_NUM_APP_RSTS]; + struct reset_control_bulk_data core_rsts[DW_PCIE_NUM_CORE_RSTS]; + struct gpio_desc *pe_rst; }; #define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp) @@ -328,6 +381,8 @@ struct dw_pcie { #define to_dw_pcie_from_ep(endpoint) \ container_of((endpoint), struct dw_pcie, ep) +int dw_pcie_get_resources(struct dw_pcie *pci); + void dw_pcie_version_detect(struct dw_pcie *pci); u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap); @@ -346,8 +401,10 @@ int dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type, u64 cpu_addr, u64 pci_addr, u64 size); int dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index, int type, u64 cpu_addr, u64 pci_addr, u64 size); -int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, - int type, u64 cpu_addr, u8 bar); +int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type, + u64 cpu_addr, u64 pci_addr, u64 size); +int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index, + int type, u64 cpu_addr, u8 bar); void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index); void dw_pcie_setup(struct dw_pcie *pci); void dw_pcie_iatu_detect(struct dw_pcie *pci); diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c index e2b80f10030d0dbd5312f4a7078c91710ac4f936..43c27812dd6d4d84cd4a72e1f91b0a795fb92700 100644 --- a/drivers/pci/controller/dwc/pcie-histb.c +++ b/drivers/pci/controller/dwc/pcie-histb.c @@ -10,11 +10,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include @@ -60,7 +60,7 @@ struct histb_pcie { struct reset_control *sys_reset; struct reset_control *bus_reset; void __iomem *ctrl; - int reset_gpio; + struct gpio_desc *reset_gpio; struct regulator *vpcie; }; @@ -212,8 +212,8 @@ static void histb_pcie_host_disable(struct histb_pcie *hipcie) clk_disable_unprepare(hipcie->sys_clk); clk_disable_unprepare(hipcie->bus_clk); - if (gpio_is_valid(hipcie->reset_gpio)) - gpio_set_value_cansleep(hipcie->reset_gpio, 0); + if (hipcie->reset_gpio) + gpiod_set_value_cansleep(hipcie->reset_gpio, 1); if (hipcie->vpcie) regulator_disable(hipcie->vpcie); @@ -235,8 +235,8 @@ static int histb_pcie_host_enable(struct dw_pcie_rp *pp) } } - if (gpio_is_valid(hipcie->reset_gpio)) - gpio_set_value_cansleep(hipcie->reset_gpio, 1); + if (hipcie->reset_gpio) + gpiod_set_value_cansleep(hipcie->reset_gpio, 0); ret = clk_prepare_enable(hipcie->bus_clk); if (ret) { @@ -298,10 +298,7 @@ static int histb_pcie_probe(struct platform_device *pdev) struct histb_pcie *hipcie; struct dw_pcie *pci; struct dw_pcie_rp *pp; - struct device_node *np = pdev->dev.of_node; struct device *dev = &pdev->dev; - enum of_gpio_flags of_flags; - unsigned long flag = GPIOF_DIR_OUT; int ret; hipcie = devm_kzalloc(dev, sizeof(*hipcie), GFP_KERNEL); @@ -336,17 +333,19 @@ static int histb_pcie_probe(struct platform_device *pdev) hipcie->vpcie = NULL; } - hipcie->reset_gpio = of_get_named_gpio_flags(np, - "reset-gpios", 0, &of_flags); - if (of_flags & OF_GPIO_ACTIVE_LOW) - flag |= GPIOF_ACTIVE_LOW; - if (gpio_is_valid(hipcie->reset_gpio)) { - ret = devm_gpio_request_one(dev, hipcie->reset_gpio, - flag, "PCIe device power control"); - if (ret) { - dev_err(dev, "unable to request gpio\n"); - return ret; - } + hipcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", + GPIOD_OUT_HIGH); + ret = PTR_ERR_OR_ZERO(hipcie->reset_gpio); + if (ret) { + dev_err(dev, "unable to request reset gpio: %d\n", ret); + return ret; + } + + ret = gpiod_set_consumer_name(hipcie->reset_gpio, + "PCIe device power control"); + if (ret) { + dev_err(dev, "unable to set reset gpio name: %d\n", ret); + return ret; } hipcie->aux_clk = devm_clk_get(dev, "aux"); diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c index f711acacaeaf8f163a399a8922a72685863b223c..38d5d46487bb91883b405223f18b017c9f472ca4 100644 --- a/drivers/pci/controller/dwc/pcie-qcom.c +++ b/drivers/pci/controller/dwc/pcie-qcom.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,7 @@ struct qcom_pcie { union qcom_pcie_resources res; struct phy *phy; struct gpio_desc *reset; + struct icc_path *icc_mem; const struct qcom_pcie_cfg *cfg; }; @@ -1236,7 +1238,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie) ret = reset_control_assert(res->pci_reset); if (ret < 0) { - dev_err(dev, "cannot deassert pci reset\n"); + dev_err(dev, "cannot assert pci reset\n"); goto err_disable_clocks; } @@ -1639,6 +1641,74 @@ static const struct dw_pcie_ops dw_pcie_ops = { .start_link = qcom_pcie_start_link, }; +static int qcom_pcie_icc_init(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + int ret; + + pcie->icc_mem = devm_of_icc_get(pci->dev, "pcie-mem"); + if (IS_ERR(pcie->icc_mem)) + return PTR_ERR(pcie->icc_mem); + + /* + * Some Qualcomm platforms require interconnect bandwidth constraints + * to be set before enabling interconnect clocks. + * + * Set an initial peak bandwidth corresponding to single-lane Gen 1 + * for the pcie-mem path. + */ + ret = icc_set_bw(pcie->icc_mem, 0, MBps_to_icc(250)); + if (ret) { + dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n", + ret); + return ret; + } + + return 0; +} + +static void qcom_pcie_icc_update(struct qcom_pcie *pcie) +{ + struct dw_pcie *pci = pcie->pci; + u32 offset, status, bw; + int speed, width; + int ret; + + if (!pcie->icc_mem) + return; + + offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP); + status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA); + + /* Only update constraints if link is up. */ + if (!(status & PCI_EXP_LNKSTA_DLLLA)) + return; + + speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status); + width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status); + + switch (speed) { + case 1: + bw = MBps_to_icc(250); + break; + case 2: + bw = MBps_to_icc(500); + break; + default: + WARN_ON_ONCE(1); + fallthrough; + case 3: + bw = MBps_to_icc(985); + break; + } + + ret = icc_set_bw(pcie->icc_mem, 0, width * bw); + if (ret) { + dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n", + ret); + } +} + static int qcom_pcie_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1699,6 +1769,10 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_pm_runtime_put; } + ret = qcom_pcie_icc_init(pcie); + if (ret) + goto err_pm_runtime_put; + ret = pcie->cfg->ops->get_resources(pcie); if (ret) goto err_pm_runtime_put; @@ -1717,6 +1791,8 @@ static int qcom_pcie_probe(struct platform_device *pdev) goto err_phy_exit; } + qcom_pcie_icc_update(pcie); + return 0; err_phy_exit: diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c index 1b6b437823d22f2fb6770493da737448ed5a58e5..02d78a12b6e72591a6534ce4c5f4f69107b5a85c 100644 --- a/drivers/pci/controller/dwc/pcie-tegra194.c +++ b/drivers/pci/controller/dwc/pcie-tegra194.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c index ba36bbc5897d0023fcbb901ed08bba049602451f..513d8edf3a5cffb19d6a15697f25dd793d39c0cc 100644 --- a/drivers/pci/controller/pci-aardvark.c +++ b/drivers/pci/controller/pci-aardvark.c @@ -1859,20 +1859,18 @@ static int advk_pcie_probe(struct platform_device *pdev) return ret; } - pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node, - "reset-gpios", 0, - GPIOD_OUT_LOW, - "pcie1-reset"); + pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW); ret = PTR_ERR_OR_ZERO(pcie->reset_gpio); if (ret) { - if (ret == -ENOENT) { - pcie->reset_gpio = NULL; - } else { - if (ret != -EPROBE_DEFER) - dev_err(dev, "Failed to get reset-gpio: %i\n", - ret); - return ret; - } + if (ret != -EPROBE_DEFER) + dev_err(dev, "Failed to get reset-gpio: %i\n", ret); + return ret; + } + + ret = gpiod_set_consumer_name(pcie->reset_gpio, "pcie1-reset"); + if (ret) { + dev_err(dev, "Failed to set reset gpio name: %d\n", ret); + return ret; } ret = of_pci_get_max_link_speed(dev->of_node); diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c index 0cfd9d5a497c9d7c8cbbef2200f9d495b835a1fd..ecd3009df586d19bd82b031769a9be3d76d27ec0 100644 --- a/drivers/pci/controller/pci-ftpci100.c +++ b/drivers/pci/controller/pci-ftpci100.c @@ -553,7 +553,7 @@ static const struct of_device_id faraday_pci_of_match[] = { static struct platform_driver faraday_pci_driver = { .driver = { .name = "ftpci100", - .of_match_table = of_match_ptr(faraday_pci_of_match), + .of_match_table = faraday_pci_of_match, .suppress_bind_attrs = true, }, .probe = faraday_pci_probe, diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 1ced73726a2671a4ae8bf851871c86bc52044180..1dc209f6f53aefa9e013f5290630ff52d21009a7 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -11,14 +11,15 @@ #include #include #include -#include +#include #include +#include +#include #include #include #include #include #include -#include #include #include @@ -1261,9 +1262,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, struct mvebu_pcie_port *port, struct device_node *child) { struct device *dev = &pcie->pdev->dev; - enum of_gpio_flags flags; u32 slot_power_limit; - int reset_gpio, ret; + int ret; u32 num_lanes; port->pcie = pcie; @@ -1327,40 +1327,24 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie, port->name, child); } - reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags); - if (reset_gpio == -EPROBE_DEFER) { - ret = reset_gpio; + port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset", + port->name); + if (!port->reset_name) { + ret = -ENOMEM; goto err; } - if (gpio_is_valid(reset_gpio)) { - unsigned long gpio_flags; - - port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset", - port->name); - if (!port->reset_name) { - ret = -ENOMEM; + port->reset_gpio = devm_fwnode_gpiod_get(dev, of_fwnode_handle(child), + "reset", GPIOD_OUT_HIGH, + port->name); + ret = PTR_ERR_OR_ZERO(port->reset_gpio); + if (ret) { + if (ret != -ENOENT) goto err; - } - - if (flags & OF_GPIO_ACTIVE_LOW) { - dev_info(dev, "%pOF: reset gpio is active low\n", - child); - gpio_flags = GPIOF_ACTIVE_LOW | - GPIOF_OUT_INIT_LOW; - } else { - gpio_flags = GPIOF_OUT_INIT_HIGH; - } - - ret = devm_gpio_request_one(dev, reset_gpio, gpio_flags, - port->reset_name); - if (ret) { - if (ret == -EPROBE_DEFER) - goto err; - goto skip; - } - - port->reset_gpio = gpio_to_desc(reset_gpio); + /* reset gpio is optional */ + port->reset_gpio = NULL; + devm_kfree(dev, port->reset_name); + port->reset_name = NULL; } slot_power_limit = of_pci_get_slot_power_limit(child, diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c index 8e323e93be91576b3bb988c57696e37d858c4cbd..929f9363e94bec7120b9f83238d964975fc4fe5f 100644 --- a/drivers/pci/controller/pci-tegra.c +++ b/drivers/pci/controller/pci-tegra.c @@ -2202,10 +2202,11 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie) * and in this case fall back to using AFI per port register * to toggle PERST# SFIO line. */ - rp->reset_gpio = devm_gpiod_get_from_of_node(dev, port, - "reset-gpios", 0, - GPIOD_OUT_LOW, - label); + rp->reset_gpio = devm_fwnode_gpiod_get(dev, + of_fwnode_handle(port), + "reset", + GPIOD_OUT_LOW, + label); if (IS_ERR(rp->reset_gpio)) { if (PTR_ERR(rp->reset_gpio) == -ENOENT) { rp->reset_gpio = NULL; diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c index 154a5398633ccb3a539a377583e02c2c0c2a4684..ca44b0c83d1bffbccf078d0182e6bbddcd02ee9b 100644 --- a/drivers/pci/controller/pci-v3-semi.c +++ b/drivers/pci/controller/pci-v3-semi.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -902,7 +901,7 @@ static const struct of_device_id v3_pci_of_match[] = { static struct platform_driver v3_pci_driver = { .driver = { .name = "pci-v3-semi", - .of_match_table = of_match_ptr(v3_pci_of_match), + .of_match_table = v3_pci_of_match, .suppress_bind_attrs = true, }, .probe = v3_pci_probe, diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c index bfa259781b692dc4792332e8474d79df8ad0dd12..d7987b281f7991581f30c61f6aba363d43535eae 100644 --- a/drivers/pci/controller/pci-xgene-msi.c +++ b/drivers/pci/controller/pci-xgene-msi.c @@ -8,9 +8,9 @@ */ #include #include +#include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c index 549d3bd6d1c27c8869ca1e41eab5f3400ad3fdce..887b4941ff32f58f83d1da11e8217345ec0cb5e9 100644 --- a/drivers/pci/controller/pci-xgene.c +++ b/drivers/pci/controller/pci-xgene.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c index 7b1d3ebc34ecd72ee0f7c914e1a71afd432c43d6..65e8a20cc44263451f26564afacb56123b60b967 100644 --- a/drivers/pci/controller/pcie-altera-msi.c +++ b/drivers/pci/controller/pcie-altera-msi.c @@ -9,11 +9,11 @@ #include #include +#include #include #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c index 521acd632f1ac8d65f7e63d5729271049236da4d..edf283e2b5dd017cce84488be0bcea3c51cba959 100644 --- a/drivers/pci/controller/pcie-brcmstb.c +++ b/drivers/pci/controller/pcie-brcmstb.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,8 @@ #define PCIE_RC_DL_MDIO_RD_DATA 0x1108 #define PCIE_MISC_MISC_CTRL 0x4008 +#define PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK 0x80 +#define PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK 0x400 #define PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK 0x1000 #define PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK 0x2000 #define PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK 0x300000 @@ -302,42 +305,34 @@ static u32 brcm_pcie_mdio_form_pkt(int port, int regad, int cmd) /* negative return value indicates error */ static int brcm_pcie_mdio_read(void __iomem *base, u8 port, u8 regad, u32 *val) { - int tries; u32 data; + int err; writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_READ), base + PCIE_RC_DL_MDIO_ADDR); readl(base + PCIE_RC_DL_MDIO_ADDR); - - data = readl(base + PCIE_RC_DL_MDIO_RD_DATA); - for (tries = 0; !MDIO_RD_DONE(data) && tries < 10; tries++) { - udelay(10); - data = readl(base + PCIE_RC_DL_MDIO_RD_DATA); - } - + err = readl_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_RD_DATA, data, + MDIO_RD_DONE(data), 10, 100); *val = FIELD_GET(MDIO_DATA_MASK, data); - return MDIO_RD_DONE(data) ? 0 : -EIO; + + return err; } /* negative return value indicates error */ static int brcm_pcie_mdio_write(void __iomem *base, u8 port, u8 regad, u16 wrdata) { - int tries; u32 data; + int err; writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_WRITE), base + PCIE_RC_DL_MDIO_ADDR); readl(base + PCIE_RC_DL_MDIO_ADDR); writel(MDIO_DATA_DONE_MASK | wrdata, base + PCIE_RC_DL_MDIO_WR_DATA); - data = readl(base + PCIE_RC_DL_MDIO_WR_DATA); - for (tries = 0; !MDIO_WT_DONE(data) && tries < 10; tries++) { - udelay(10); - data = readl(base + PCIE_RC_DL_MDIO_WR_DATA); - } - - return MDIO_WT_DONE(data) ? 0 : -EIO; + err = readw_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_WR_DATA, data, + MDIO_WT_DONE(data), 10, 100); + return err; } /* @@ -445,7 +440,8 @@ static struct irq_chip brcm_msi_irq_chip = { static struct msi_domain_info brcm_msi_domain_info = { /* Multi MSI is supported by the controller, but not by this driver */ - .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS), + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS | + MSI_FLAG_MULTI_PCI_MSI), .chip = &brcm_msi_irq_chip, }; @@ -505,21 +501,23 @@ static struct irq_chip brcm_msi_bottom_irq_chip = { .irq_ack = brcm_msi_ack_irq, }; -static int brcm_msi_alloc(struct brcm_msi *msi) +static int brcm_msi_alloc(struct brcm_msi *msi, unsigned int nr_irqs) { int hwirq; mutex_lock(&msi->lock); - hwirq = bitmap_find_free_region(msi->used, msi->nr, 0); + hwirq = bitmap_find_free_region(msi->used, msi->nr, + order_base_2(nr_irqs)); mutex_unlock(&msi->lock); return hwirq; } -static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq) +static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq, + unsigned int nr_irqs) { mutex_lock(&msi->lock); - bitmap_release_region(msi->used, hwirq, 0); + bitmap_release_region(msi->used, hwirq, order_base_2(nr_irqs)); mutex_unlock(&msi->lock); } @@ -527,16 +525,17 @@ static int brcm_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *args) { struct brcm_msi *msi = domain->host_data; - int hwirq; + int hwirq, i; - hwirq = brcm_msi_alloc(msi); + hwirq = brcm_msi_alloc(msi, nr_irqs); if (hwirq < 0) return hwirq; - irq_domain_set_info(domain, virq, (irq_hw_number_t)hwirq, - &brcm_msi_bottom_irq_chip, domain->host_data, - handle_edge_irq, NULL, NULL); + for (i = 0; i < nr_irqs; i++) + irq_domain_set_info(domain, virq + i, hwirq + i, + &brcm_msi_bottom_irq_chip, domain->host_data, + handle_edge_irq, NULL, NULL); return 0; } @@ -546,7 +545,7 @@ static void brcm_irq_domain_free(struct irq_domain *domain, struct irq_data *d = irq_domain_get_irq_data(domain, virq); struct brcm_msi *msi = irq_data_get_irq_chip_data(d); - brcm_msi_free(msi, d->hwirq); + brcm_msi_free(msi, d->hwirq, nr_irqs); } static const struct irq_domain_ops msi_domain_ops = { @@ -726,7 +725,7 @@ static void __iomem *brcm7425_pcie_map_bus(struct pci_bus *bus, return base + DATA_ADDR(pcie); } -static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val) { u32 tmp, mask = RGR1_SW_INIT_1_INIT_GENERIC_MASK; u32 shift = RGR1_SW_INIT_1_INIT_GENERIC_SHIFT; @@ -736,7 +735,7 @@ static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val) { u32 tmp, mask = RGR1_SW_INIT_1_INIT_7278_MASK; u32 shift = RGR1_SW_INIT_1_INIT_7278_SHIFT; @@ -746,7 +745,7 @@ static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) { if (WARN_ONCE(!pcie->perst_reset, "missing PERST# reset controller\n")) return; @@ -757,7 +756,7 @@ static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val) reset_control_deassert(pcie->perst_reset); } -static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) { u32 tmp; @@ -767,7 +766,7 @@ static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val) writel(tmp, pcie->base + PCIE_MISC_PCIE_CTRL); } -static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) +static void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) { u32 tmp; @@ -776,7 +775,7 @@ static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val) writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie)); } -static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, +static int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie, u64 *rc_bar2_size, u64 *rc_bar2_offset) { @@ -903,11 +902,16 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie) else burst = 0x2; /* 512 bytes */ - /* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */ + /* + * Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN, + * RCB_MPS_MODE, RCB_64B_MODE + */ tmp = readl(base + PCIE_MISC_MISC_CTRL); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK); u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK); u32p_replace_bits(&tmp, burst, PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK); + u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK); + u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK); writel(tmp, base + PCIE_MISC_MISC_CTRL); ret = brcm_pcie_get_rc_bar2_size_and_offset(pcie, &rc_bar2_size, @@ -1033,8 +1037,15 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie) pcie->perst_set(pcie, 0); /* - * Give the RC/EP time to wake up, before trying to configure RC. - * Intermittently check status for link-up, up to a total of 100ms. + * Wait for 100ms after PERST# deassertion; see PCIe CEM specification + * sections 2.2, PCIe r5.0, 6.6.1. + */ + msleep(100); + + /* + * Give the RC/EP even more time to wake up, before trying to + * configure RC. Intermittently check status for link-up, up to a + * total of 100ms. */ for (i = 0; i < 100 && !brcm_pcie_link_up(pcie); i += 5) msleep(5); diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c index 538115246c799ceef5df93af12915dbc977c1dc1..4142a73e611d111553d83d8fbff788d2b1e55a5a 100644 --- a/drivers/pci/controller/pcie-iproc-platform.c +++ b/drivers/pci/controller/pcie-iproc-platform.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c index 2519201b0e51c4cc8d477136ab0b2ce46638cca1..83029bdfd88444966da8215a2d4a2120d254a15f 100644 --- a/drivers/pci/controller/pcie-iproc.c +++ b/drivers/pci/controller/pcie-iproc.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c index 7263d175b5adb7338ac77034aa1e5e1dea2856a4..0ebf7015e9afd79c4de63522e1825dede6fdae8a 100644 --- a/drivers/pci/controller/pcie-microchip-host.c +++ b/drivers/pci/controller/pcie-microchip-host.c @@ -9,10 +9,10 @@ #include #include +#include #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c index 4bd1abf26008f7205f9a19b4772de97a1ea16d40..ee7aad09d6277ef121ec806a4674e7f83fdef135 100644 --- a/drivers/pci/controller/pcie-mt7621.c +++ b/drivers/pci/controller/pcie-mt7621.c @@ -466,7 +466,8 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host) } static const struct soc_device_attribute mt7621_pcie_quirks_match[] = { - { .soc_id = "mt7621", .revision = "E2" } + { .soc_id = "mt7621", .revision = "E2" }, + { /* sentinel */ } }; static int mt7621_pcie_probe(struct platform_device *pdev) diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c index 7352b5ff8d35981e8b3093f8dbe5e395938db00a..c96c0f454570ce06d8dcde3f5b6426382fc86524 100644 --- a/drivers/pci/controller/pcie-rockchip-host.c +++ b/drivers/pci/controller/pcie-rockchip-host.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c index e4ab48041eb6d48c953d2704af53214ef5814241..4a787a941674bc77f54bfd7ed1428527cd7f2337 100644 --- a/drivers/pci/controller/pcie-xilinx-cpm.c +++ b/drivers/pci/controller/pcie-xilinx-cpm.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c index 40d070e54ad2e4b8ff2744d8ee00b5f5b85d6e0d..176686bdb15c18e5dc951fdc315d00a1bd2142fc 100644 --- a/drivers/pci/controller/pcie-xilinx-nwl.c +++ b/drivers/pci/controller/pcie-xilinx-nwl.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -474,15 +473,15 @@ static int nwl_irq_domain_alloc(struct irq_domain *domain, unsigned int virq, for (i = 0; i < nr_irqs; i++) { irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip, - domain->host_data, handle_simple_irq, - NULL, NULL); + domain->host_data, handle_simple_irq, + NULL, NULL); } mutex_unlock(&msi->lock); return 0; } static void nwl_irq_domain_free(struct irq_domain *domain, unsigned int virq, - unsigned int nr_irqs) + unsigned int nr_irqs) { struct irq_data *data = irq_domain_get_irq_data(domain, virq); struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data); @@ -722,7 +721,6 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie) /* Enable all misc interrupts */ nwl_bridge_writel(pcie, MSGF_MISC_SR_MASKALL, MSGF_MISC_MASK); - /* Disable all legacy interrupts */ nwl_bridge_writel(pcie, (u32)~MSGF_LEG_SR_MASKALL, MSGF_LEG_MASK); diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index e06e9f4fc50f72b40c72a8a7fb1105b5a8fca1bb..769eedeb8802a248d104d206a84e200d5c3ca326 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -719,6 +719,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) resource_size_t offset[2] = {0}; resource_size_t membar2_offset = 0x2000; struct pci_bus *child; + struct pci_dev *dev; int ret; /* @@ -859,8 +860,25 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features) pci_scan_child_bus(vmd->bus); vmd_domain_reset(vmd); - list_for_each_entry(child, &vmd->bus->children, node) - pci_reset_bus(child->self); + + /* When Intel VMD is enabled, the OS does not discover the Root Ports + * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies + * a reset to the parent of the PCI device supplied as argument. This + * is why we pass a child device, so the reset can be triggered at + * the Intel bridge level and propagated to all the children in the + * hierarchy. + */ + list_for_each_entry(child, &vmd->bus->children, node) { + if (!list_empty(&child->devices)) { + dev = list_first_entry(&child->devices, + struct pci_dev, bus_list); + if (pci_reset_bus(dev)) + pci_warn(dev, "can't reset device: %d\n", ret); + + break; + } + } + pci_assign_unassigned_bus_resources(vmd->bus); /* @@ -980,6 +998,11 @@ static int vmd_resume(struct device *dev) struct vmd_dev *vmd = pci_get_drvdata(pdev); int err, i; + if (vmd->irq_domain) + vmd_set_msi_remapping(vmd, true); + else + vmd_set_msi_remapping(vmd, false); + for (i = 0; i < vmd->msix_count; i++) { err = devm_request_irq(dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c index e402f05068a5340f6609ab097b71b24f2775e087..66d9ab2886468d02a32eb40353ad5bb24909f3ac 100644 --- a/drivers/pci/doe.c +++ b/drivers/pci/doe.c @@ -29,6 +29,9 @@ #define PCI_DOE_FLAG_CANCEL 0 #define PCI_DOE_FLAG_DEAD 1 +/* Max data object length is 2^18 dwords */ +#define PCI_DOE_MAX_LENGTH (1 << 18) + /** * struct pci_doe_mb - State for a single DOE mailbox * @@ -107,6 +110,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, { struct pci_dev *pdev = doe_mb->pdev; int offset = doe_mb->cap_offset; + size_t length; u32 val; int i; @@ -123,15 +127,20 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb, if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) return -EIO; + /* Length is 2 DW of header + length of payload in DW */ + length = 2 + task->request_pl_sz / sizeof(u32); + if (length > PCI_DOE_MAX_LENGTH) + return -EIO; + if (length == PCI_DOE_MAX_LENGTH) + length = 0; + /* Write DOE Header */ val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) | FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type); pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val); - /* Length is 2 DW of header + length of payload in DW */ pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, - 2 + task->request_pl_sz / - sizeof(u32))); + length)); for (i = 0; i < task->request_pl_sz / sizeof(u32); i++) pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, task->request_pl[i]); @@ -178,7 +187,10 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0); length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val); - if (length > SZ_1M || length < 2) + /* A value of 0x0 indicates max data object length */ + if (!length) + length = PCI_DOE_MAX_LENGTH; + if (length < 2) return -EIO; /* First 2 dwords have already been read */ diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig index 295a033ee9a2730eae5c500a4f5eb937f324b716..9fd5608868718df40011f408c9c4d8d9940ab796 100644 --- a/drivers/pci/endpoint/functions/Kconfig +++ b/drivers/pci/endpoint/functions/Kconfig @@ -27,13 +27,13 @@ config PCI_EPF_NTB If in doubt, say "N" to disable Endpoint NTB driver. config PCI_EPF_VNTB - tristate "PCI Endpoint NTB driver" - depends on PCI_ENDPOINT - depends on NTB - select CONFIGFS_FS - help - Select this configuration option to enable the Non-Transparent - Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB - between PCI Root Port and PCIe Endpoint. + tristate "PCI Endpoint NTB driver" + depends on PCI_ENDPOINT + depends on NTB + select CONFIGFS_FS + help + Select this configuration option to enable the Non-Transparent + Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB + between PCI Root Port and PCIe Endpoint. - If in doubt, say "N" to disable Endpoint NTB driver. + If in doubt, say "N" to disable Endpoint NTB driver. diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c index 36b1801a061b7c007839f1dae35cdb3ffe51c358..55283d2379a6a42cd151213cd5eef892ed7406d4 100644 --- a/drivers/pci/endpoint/functions/pci-epf-test.c +++ b/drivers/pci/endpoint/functions/pci-epf-test.c @@ -979,7 +979,7 @@ static int pci_epf_test_bind(struct pci_epf *epf) if (ret) epf_test->dma_supported = false; - if (linkup_notifier) { + if (linkup_notifier || core_init_notifier) { epf->nb.notifier_call = pci_epf_test_notifier; pci_epc_register_notifier(epc, &epf->nb); } else { diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c index 0ea85e1d292ece294055d3dce6f064cfe505823c..04698e7995a54c7f22e7c597d9b93f21b4cff7ea 100644 --- a/drivers/pci/endpoint/functions/pci-epf-vntb.c +++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c @@ -11,7 +11,7 @@ * Author: Kishon Vijay Abraham I */ -/** +/* * +------------+ +---------------------------------------+ * | | | | * +------------+ | +--------------+ @@ -99,20 +99,20 @@ enum epf_ntb_bar { * NTB Driver NTB Driver */ struct epf_ntb_ctrl { - u32 command; - u32 argument; - u16 command_status; - u16 link_status; - u32 topology; - u64 addr; - u64 size; - u32 num_mws; - u32 reserved; - u32 spad_offset; - u32 spad_count; - u32 db_entry_size; - u32 db_data[MAX_DB_COUNT]; - u32 db_offset[MAX_DB_COUNT]; + u32 command; + u32 argument; + u16 command_status; + u16 link_status; + u32 topology; + u64 addr; + u64 size; + u32 num_mws; + u32 reserved; + u32 spad_offset; + u32 spad_count; + u32 db_entry_size; + u32 db_data[MAX_DB_COUNT]; + u32 db_offset[MAX_DB_COUNT]; } __packed; struct epf_ntb { @@ -136,8 +136,7 @@ struct epf_ntb { struct epf_ntb_ctrl *reg; - phys_addr_t epf_db_phy; - void __iomem *epf_db; + u32 *epf_db; phys_addr_t vpci_mw_phy[MAX_MW]; void __iomem *vpci_mw_addr[MAX_MW]; @@ -156,12 +155,14 @@ static struct pci_epf_header epf_ntb_header = { }; /** - * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host + * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host (VHOST) * @ntb: NTB device that facilitates communication between HOST and VHOST * @link_up: true or false indicating Link is UP or Down * * Once NTB function in HOST invoke ntb_link_enable(), - * this NTB function driver will trigger a link event to vhost. + * this NTB function driver will trigger a link event to VHOST. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) { @@ -175,9 +176,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) } /** - * epf_ntb_configure_mw() - Configure the Outbound Address Space for vhost - * to access the memory window of host - * @ntb: NTB device that facilitates communication between host and vhost + * epf_ntb_configure_mw() - Configure the Outbound Address Space for VHOST + * to access the memory window of HOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * @mw: Index of the memory window (either 0, 1, 2 or 3) * * EP Outbound Window @@ -194,7 +195,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up) * | | | | * | | | | * +--------+ +-----------+ - * VHost PCI EP + * VHOST PCI EP + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw) { @@ -219,7 +222,7 @@ static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw) /** * epf_ntb_teardown_mw() - Teardown the configured OB ATU - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * @mw: Index of the memory window (either 0, 1, 2 or 3) * * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using @@ -234,12 +237,12 @@ static void epf_ntb_teardown_mw(struct epf_ntb *ntb, u32 mw) } /** - * epf_ntb_cmd_handler() - Handle commands provided by the NTB Host + * epf_ntb_cmd_handler() - Handle commands provided by the NTB HOST * @work: work_struct for the epf_ntb_epc * * Workqueue function that gets invoked for the two epf_ntb_epc * periodically (once every 5ms) to see if it has received any commands - * from NTB host. The host can send commands to configure doorbell or + * from NTB HOST. The HOST can send commands to configure doorbell or * configure memory window or to update link status. */ static void epf_ntb_cmd_handler(struct work_struct *work) @@ -254,12 +257,10 @@ static void epf_ntb_cmd_handler(struct work_struct *work) ntb = container_of(work, struct epf_ntb, cmd_handler.work); for (i = 1; i < ntb->db_count; i++) { - if (readl(ntb->epf_db + i * 4)) { - if (readl(ntb->epf_db + i * 4)) - ntb->db |= 1 << (i - 1); - + if (ntb->epf_db[i]) { + ntb->db |= 1 << (i - 1); ntb_db_event(&ntb->ntb, i); - writel(0, ntb->epf_db + i * 4); + ntb->epf_db[i] = 0; } } @@ -321,8 +322,8 @@ reset_handler: /** * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR - * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound - * address. + * @ntb: EPC associated with one of the HOST which holds peer's outbound + * address. * * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and * self scratchpad region (removes inbound ATU configuration). While BAR0 is @@ -331,8 +332,10 @@ reset_handler: * used for self scratchpad from epf_ntb_bar[BAR_CONFIG]. * * Please note the self scratchpad region and config region is combined to - * a single region and mapped using the same BAR. Also note HOST2's peer - * scratchpad is HOST1's self scratchpad. + * a single region and mapped using the same BAR. Also note VHOST's peer + * scratchpad is HOST's self scratchpad. + * + * Returns: void */ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb) { @@ -347,13 +350,15 @@ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb) /** * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * - * Map BAR0 of EP CONTROLLER 1 which contains the HOST1's config and + * Map BAR0 of EP CONTROLLER which contains the VHOST's config and * self scratchpad region. * * Please note the self scratchpad region and config region is combined to * a single region and mapped using the same BAR. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb) { @@ -380,7 +385,7 @@ static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb) /** * epf_ntb_config_spad_bar_free() - Free the physical memory associated with * config + scratchpad region - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb) { @@ -393,11 +398,13 @@ static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb) /** * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad * region - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Allocate the Local Memory mentioned in the above diagram. The size of * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION * is obtained from "spad-count" configfs entry. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) { @@ -424,7 +431,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) spad_count = ntb->spad_count; ctrl_size = sizeof(struct epf_ntb_ctrl); - spad_size = 2 * spad_count * 4; + spad_size = 2 * spad_count * sizeof(u32); if (!align) { ctrl_size = roundup_pow_of_two(ctrl_size); @@ -454,7 +461,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) ctrl->num_mws = ntb->num_mws; ntb->spad_size = spad_size; - ctrl->db_entry_size = 4; + ctrl->db_entry_size = sizeof(u32); for (i = 0; i < ntb->db_count; i++) { ntb->reg->db_data[i] = 1 + i; @@ -465,11 +472,13 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb) } /** - * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capaiblity - * @ntb: NTB device that facilitates communication between HOST and vHOST + * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Configure MSI/MSI-X capability for each interface with number of * interrupts equal to "db_count" configfs entry. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb) { @@ -511,7 +520,9 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb) /** * epf_ntb_db_bar_init() - Configure Doorbell window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) { @@ -522,7 +533,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) struct pci_epf_bar *epf_bar; void __iomem *mw_addr; enum pci_barno barno; - size_t size = 4 * ntb->db_count; + size_t size = sizeof(u32) * ntb->db_count; epc_features = pci_epc_get_features(ntb->epf->epc, ntb->epf->func_no, @@ -557,7 +568,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb) return ret; err_alloc_peer_mem: - pci_epc_mem_free_addr(ntb->epf->epc, epf_bar->phys_addr, mw_addr, epf_bar->size); + pci_epf_free_space(ntb->epf, mw_addr, barno, 0); return -1; } @@ -566,7 +577,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws); /** * epf_ntb_db_bar_clear() - Clear doorbell BAR and free memory * allocated in peer's outbound address space - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb) { @@ -582,8 +593,9 @@ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb) /** * epf_ntb_mw_bar_init() - Configure Memory window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_mw_bar_init(struct epf_ntb *ntb) { @@ -639,7 +651,7 @@ err_alloc_mem: /** * epf_ntb_mw_bar_clear() - Clear Memory window BARs - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST */ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws) { @@ -662,7 +674,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws) /** * epf_ntb_epc_destroy() - Cleanup NTB EPC interface - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces */ @@ -675,7 +687,9 @@ static void epf_ntb_epc_destroy(struct epf_ntb *ntb) /** * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB * constructs (scratchpad region, doorbell, memorywindow) - * @ntb: NTB device that facilitates communication between HOST and vHOST + * @ntb: NTB device that facilitates communication between HOST and VHOST + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) { @@ -716,11 +730,13 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb) /** * epf_ntb_epc_init() - Initialize NTB interface - * @ntb: NTB device that facilitates communication between HOST and vHOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper to initialize a particular EPC interface and start the workqueue - * to check for commands from host. This function will write to the + * to check for commands from HOST. This function will write to the * EP controller HW for configuring it. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_epc_init(struct epf_ntb *ntb) { @@ -787,7 +803,7 @@ err_config_interrupt: /** * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces - * @ntb: NTB device that facilitates communication between HOST1 and HOST2 + * @ntb: NTB device that facilitates communication between HOST and VHOST * * Wrapper to cleanup all NTB interfaces. */ @@ -951,6 +967,8 @@ static const struct config_item_type ntb_group_type = { * * Add configfs directory specific to NTB. This directory will hold * NTB specific properties like db_count, spad_count, num_mws etc., + * + * Returns: Pointer to config_group */ static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf, struct config_group *group) @@ -1101,11 +1119,11 @@ static int vntb_epf_link_enable(struct ntb_dev *ntb, static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx) { struct epf_ntb *ntb = ntb_ndev(ndev); - int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * 4; + int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32); u32 val; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; - val = readl(base + off + ct + idx * 4); + val = readl(base + off + ct + idx * sizeof(u32)); return val; } @@ -1113,10 +1131,10 @@ static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val) { struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; - int off = ctrl->spad_offset, ct = ctrl->spad_count * 4; - void __iomem *base = ntb->reg; + int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32); + void __iomem *base = (void __iomem *)ntb->reg; - writel(val, base + off + ct + idx * 4); + writel(val, base + off + ct + idx * sizeof(u32)); return 0; } @@ -1125,10 +1143,10 @@ static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx) struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; int off = ctrl->spad_offset; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; u32 val; - val = readl(base + off + idx * 4); + val = readl(base + off + idx * sizeof(u32)); return val; } @@ -1137,9 +1155,9 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32 struct epf_ntb *ntb = ntb_ndev(ndev); struct epf_ntb_ctrl *ctrl = ntb->reg; int off = ctrl->spad_offset; - void __iomem *base = ntb->reg; + void __iomem *base = (void __iomem *)ntb->reg; - writel(val, base + off + idx * 4); + writel(val, base + off + idx * sizeof(u32)); return 0; } @@ -1292,6 +1310,8 @@ static struct pci_driver vntb_pci_driver = { * Invoked when a primary interface or secondary interface is bound to EPC * device. This function will succeed only when EPC is bound to both the * interfaces. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_bind(struct pci_epf *epf) { @@ -1377,6 +1397,8 @@ static struct pci_epf_ops epf_ntb_ops = { * * Probe NTB function driver when endpoint function bus detects a NTB * endpoint function. + * + * Returns: Zero for success, or an error code in case of failure */ static int epf_ntb_probe(struct pci_epf *epf) { diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c index 3bc9273d0a08293ca47d564b795be46326f31ea7..2542196e8c3d95ab5b5a92dbec50b3cb63054c09 100644 --- a/drivers/pci/endpoint/pci-epc-core.c +++ b/drivers/pci/endpoint/pci-epc-core.c @@ -724,7 +724,6 @@ void pci_epc_destroy(struct pci_epc *epc) { pci_ep_cfs_remove_epc_group(epc->group); device_unregister(&epc->dev); - kfree(epc); } EXPORT_SYMBOL_GPL(pci_epc_destroy); @@ -746,6 +745,11 @@ void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc) } EXPORT_SYMBOL_GPL(devm_pci_epc_destroy); +static void pci_epc_release(struct device *dev) +{ + kfree(to_pci_epc(dev)); +} + /** * __pci_epc_create() - create a new endpoint controller (EPC) device * @dev: device that is creating the new EPC @@ -779,6 +783,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, device_initialize(&epc->dev); epc->dev.class = pci_epc_class; epc->dev.parent = dev; + epc->dev.release = pci_epc_release; epc->ops = ops; ret = dev_set_name(&epc->dev, "%s", dev_name(dev)); diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig index 840a84bb5ee26e54ad6ec4abab52de5e43053715..48113b210cf9384fff963de10799c88366bcdb36 100644 --- a/drivers/pci/hotplug/Kconfig +++ b/drivers/pci/hotplug/Kconfig @@ -6,11 +6,14 @@ menuconfig HOTPLUG_PCI bool "Support for PCI Hotplug" depends on PCI && SYSFS + default y if USB4 help Say Y here if you have a motherboard with a PCI Hotplug controller. This allows you to add and remove PCI cards while the machine is powered up and running. + Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. + When in doubt, say N. if HOTPLUG_PCI diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO index 88f217c82b4ffd1ae4f9d8671ccce372fcbadba7..fdb8dd6ea24dc8bb6afdfe1c767d3b36f6d52080 100644 --- a/drivers/pci/hotplug/TODO +++ b/drivers/pci/hotplug/TODO @@ -58,9 +58,6 @@ shpchp: pciehp with commit 82a9e79ef132 ("PCI: pciehp: remove hpc_ops"). Clarify if there was a specific reason not to apply the same change to shpchp. -* The ->get_mode1_ECC_cap callback in shpchp_hpc_ops is never invoked. - Why was it introduced? Can it be removed? - * The hardirq handler shpc_isr() queues events on a workqueue. It can be simplified by converting it to threaded IRQ handling. Use pciehp as a template. diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c index 6efa3d8db9a561907b4a7bfc7d1bc334a7fc7044..5b1f271c6034be045aa446f7e5aacb2d6e3f2e2a 100644 --- a/drivers/pci/hotplug/acpiphp_glue.c +++ b/drivers/pci/hotplug/acpiphp_glue.c @@ -411,6 +411,14 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev) if (dev->is_hotplug_bridge) return; + /* + * In the PCIe case, only Root Ports and Downstream Ports are capable of + * accommodating hotplug devices, so avoid marking Upstream Ports as + * "hotplug bridges". + */ + if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM) + return; + list_for_each_entry(func, &slot->funcs, sibling) { if (PCI_FUNC(dev->devfn) == func->function) { dev->is_hotplug_bridge = 1; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 040ae076ec0e98b0f4731ec0e275514dbd75e2c8..10e9670eea0b06fafb82e945d23d0cbe6e4a9b31 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -811,7 +811,9 @@ static void pcie_enable_notification(struct controller *ctrl) else cmd |= PCI_EXP_SLTCTL_PDCE; if (!pciehp_poll_mode) - cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE; + cmd |= PCI_EXP_SLTCTL_HPIE; + if (!pciehp_poll_mode && !NO_CMD_CMPL(ctrl)) + cmd |= PCI_EXP_SLTCTL_CCIE; mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE | PCI_EXP_SLTCTL_PFDE | diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 6e85885b554c5da58d7b7e732488407fa113b4d2..3a97f455336e6729ec6a448b75a8f768789250ec 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -311,7 +311,6 @@ struct hpc_ops { int (*get_latch_status)(struct slot *slot, u8 *status); int (*get_adapter_status)(struct slot *slot, u8 *status); int (*get_adapter_speed)(struct slot *slot, enum pci_bus_speed *speed); - int (*get_mode1_ECC_cap)(struct slot *slot, u8 *mode); int (*get_prog_int)(struct slot *slot, u8 *prog_int); int (*query_power_fault)(struct slot *slot); void (*green_led_on)(struct slot *slot); diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index bd7557ca49108de988fbfa3451ca85393cd03b42..48e4daefc44af42c8bb58339df3d4c1f35f844b0 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -489,23 +489,6 @@ static int hpc_get_adapter_speed(struct slot *slot, enum pci_bus_speed *value) return retval; } -static int hpc_get_mode1_ECC_cap(struct slot *slot, u8 *mode) -{ - int retval = 0; - struct controller *ctrl = slot->ctrl; - u16 sec_bus_status = shpc_readw(ctrl, SEC_BUS_CONFIG); - u8 pi = shpc_readb(ctrl, PROG_INTERFACE); - - if (pi == 2) { - *mode = (sec_bus_status & 0x0100) >> 8; - } else { - retval = -1; - } - - ctrl_dbg(ctrl, "Mode 1 ECC cap = %d\n", *mode); - return retval; -} - static int hpc_query_power_fault(struct slot *slot) { struct controller *ctrl = slot->ctrl; @@ -900,7 +883,6 @@ static const struct hpc_ops shpchp_hpc_ops = { .get_adapter_status = hpc_get_adapter_status, .get_adapter_speed = hpc_get_adapter_speed, - .get_mode1_ECC_cap = hpc_get_mode1_ECC_cap, .get_prog_int = hpc_get_prog_int, .query_power_fault = hpc_query_power_fault, diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c index 12ecd0aaa28d6dbd331f2da7c8aaae4c3ffe22da..0050e8f6814ed655ee06a87252ed468d22963722 100644 --- a/drivers/pci/irq.c +++ b/drivers/pci/irq.c @@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler, va_start(ap, fmt); devname = kvasprintf(GFP_KERNEL, fmt, ap); va_end(ap); + if (!devname) + return -ENOMEM; ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn, irqflags, devname, dev_id); diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index a46fec776ad77b12220358ff2269b9214bd4c924..068d6745bf98cb5f8dc38b46c28dacfa26944a51 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -67,7 +67,7 @@ static acpi_status acpi_match_rc(acpi_handle handle, u32 lvl, void *context, unsigned long long uid; acpi_status status; - status = acpi_evaluate_integer(handle, "_UID", NULL, &uid); + status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid); if (ACPI_FAILURE(status) || uid != *segment) return AE_CTRL_DEPTH; diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 107d77f3c846708f0eac6c857b757502e2cc6de0..a2ceeacc33eb6e50bb5c6fd27a9019f2bb002ee2 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -646,7 +646,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state) return 0; } -static int pci_legacy_suspend_late(struct device *dev, pm_message_t state) +static int pci_legacy_suspend_late(struct device *dev) { struct pci_dev *pci_dev = to_pci_dev(dev); @@ -848,7 +848,7 @@ static int pci_pm_suspend_noirq(struct device *dev) return 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_SUSPEND); + return pci_legacy_suspend_late(dev); if (!pm) { pci_save_state(pci_dev); @@ -1060,7 +1060,7 @@ static int pci_pm_freeze_noirq(struct device *dev) const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_FREEZE); + return pci_legacy_suspend_late(dev); if (pm && pm->freeze_noirq) { int error; @@ -1179,7 +1179,7 @@ static int pci_pm_poweroff_noirq(struct device *dev) return 0; if (pci_has_legacy_pm_support(pci_dev)) - return pci_legacy_suspend_late(dev, PMSG_HIBERNATE); + return pci_legacy_suspend_late(dev); if (!pm) { pci_fixup_device(pci_fixup_suspend_late, pci_dev); diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 6c250eb214e80f4962c4298e4918507d9a2bc11a..dd0d9d9bc5097e567c7eeb50b101b0822d0b2589 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1182,11 +1182,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) sysfs_bin_attr_init(res_attr); if (write_combine) { - pdev->res_attr_wc[num] = res_attr; sprintf(res_attr_name, "resource%d_wc", num); res_attr->mmap = pci_mmap_resource_wc; } else { - pdev->res_attr[num] = res_attr; sprintf(res_attr_name, "resource%d", num); if (pci_resource_flags(pdev, num) & IORESOURCE_IO) { res_attr->read = pci_read_resource_io; @@ -1204,10 +1202,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) res_attr->size = pci_resource_len(pdev, num); res_attr->private = (void *)(unsigned long)num; retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr); - if (retval) + if (retval) { kfree(res_attr); + return retval; + } + + if (write_combine) + pdev->res_attr_wc[num] = res_attr; + else + pdev->res_attr[num] = res_attr; - return retval; + return 0; } /** diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2127aba3550b5d611970bc8d3dccd7d5a60fa301..fba95486caaf2fad52bea9e7176b7ec21c77121d 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -6447,6 +6447,8 @@ bool pci_device_is_present(struct pci_dev *pdev) { u32 v; + /* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */ + pdev = pci_physfn(pdev); if (pci_dev_is_disconnected(pdev)) return false; return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0); @@ -6743,60 +6745,70 @@ static void pci_no_domains(void) } #ifdef CONFIG_PCI_DOMAINS_GENERIC -static atomic_t __domain_nr = ATOMIC_INIT(-1); +static DEFINE_IDA(pci_domain_nr_static_ida); +static DEFINE_IDA(pci_domain_nr_dynamic_ida); -static int pci_get_new_domain_nr(void) +static void of_pci_reserve_static_domain_nr(void) { - return atomic_inc_return(&__domain_nr); + struct device_node *np; + int domain_nr; + + for_each_node_by_type(np, "pci") { + domain_nr = of_get_pci_domain_nr(np); + if (domain_nr < 0) + continue; + /* + * Permanently allocate domain_nr in dynamic_ida + * to prevent it from dynamic allocation. + */ + ida_alloc_range(&pci_domain_nr_dynamic_ida, + domain_nr, domain_nr, GFP_KERNEL); + } } static int of_pci_bus_find_domain_nr(struct device *parent) { - static int use_dt_domains = -1; - int domain = -1; + static bool static_domains_reserved = false; + int domain_nr; - if (parent) - domain = of_get_pci_domain_nr(parent->of_node); + /* On the first call scan device tree for static allocations. */ + if (!static_domains_reserved) { + of_pci_reserve_static_domain_nr(); + static_domains_reserved = true; + } + + if (parent) { + /* + * If domain is in DT, allocate it in static IDA. This + * prevents duplicate static allocations in case of errors + * in DT. + */ + domain_nr = of_get_pci_domain_nr(parent->of_node); + if (domain_nr >= 0) + return ida_alloc_range(&pci_domain_nr_static_ida, + domain_nr, domain_nr, + GFP_KERNEL); + } /* - * Check DT domain and use_dt_domains values. - * - * If DT domain property is valid (domain >= 0) and - * use_dt_domains != 0, the DT assignment is valid since this means - * we have not previously allocated a domain number by using - * pci_get_new_domain_nr(); we should also update use_dt_domains to - * 1, to indicate that we have just assigned a domain number from - * DT. - * - * If DT domain property value is not valid (ie domain < 0), and we - * have not previously assigned a domain number from DT - * (use_dt_domains != 1) we should assign a domain number by - * using the: - * - * pci_get_new_domain_nr() - * - * API and update the use_dt_domains value to keep track of method we - * are using to assign domain numbers (use_dt_domains = 0). - * - * All other combinations imply we have a platform that is trying - * to mix domain numbers obtained from DT and pci_get_new_domain_nr(), - * which is a recipe for domain mishandling and it is prevented by - * invalidating the domain value (domain = -1) and printing a - * corresponding error. + * If domain was not specified in DT, choose a free ID from dynamic + * allocations. All domain numbers from DT are permanently in + * dynamic allocations to prevent assigning them to other DT nodes + * without static domain. */ - if (domain >= 0 && use_dt_domains) { - use_dt_domains = 1; - } else if (domain < 0 && use_dt_domains != 1) { - use_dt_domains = 0; - domain = pci_get_new_domain_nr(); - } else { - if (parent) - pr_err("Node %pOF has ", parent->of_node); - pr_err("Inconsistent \"linux,pci-domain\" property in DT\n"); - domain = -1; - } + return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL); +} - return domain; +static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (bus->domain_nr < 0) + return; + + /* Release domain from IDA where it was allocated. */ + if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr) + ida_free(&pci_domain_nr_static_ida, bus->domain_nr); + else + ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr); } int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) @@ -6804,6 +6816,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent) return acpi_disabled ? of_pci_bus_find_domain_nr(parent) : acpi_pci_bus_find_domain_nr(bus); } + +void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent) +{ + if (!acpi_disabled) + return; + of_pci_bus_release_domain_nr(bus, parent); +} #endif /** diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index b1ebb7ab8805170d224b15b292be52b092090457..9ed3b5550043267725f963d12a5dc03a9b469c03 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -15,6 +15,7 @@ extern const unsigned char pcie_link_speed[]; extern bool pci_early_dump; bool pcie_cap_has_lnkctl(const struct pci_dev *dev); +bool pcie_cap_has_lnkctl2(const struct pci_dev *dev); bool pcie_cap_has_rtctl(const struct pci_dev *dev); /* Functions internal to the PCI core code */ diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index 788ac8df3f9dbf9891ceb612af909e7b07dc7805..228652a59f27e0c29ecef7fb337596653dd9e0b4 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -4,6 +4,7 @@ # config PCIEPORTBUS bool "PCI Express Port Bus support" + default y if USB4 help This enables PCI Express Port Bus support. Users can then enable support for Native Hot-Plug, Advanced Error Reporting, Power @@ -15,9 +16,12 @@ config PCIEPORTBUS config HOTPLUG_PCI_PCIE bool "PCI Express Hotplug driver" depends on HOTPLUG_PCI && PCIEPORTBUS + default y if USB4 help - Say Y here if you have a motherboard that supports PCI Express Native - Hotplug + Say Y here if you have a motherboard that supports PCIe native + hotplug. + + Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug. When in doubt, say N. diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile index 5783a2f79e6a2eda315003f6f5cfe1ba510b585f..8de4ed5f98f145065b6c2678f7e09dd184d09a24 100644 --- a/drivers/pci/pcie/Makefile +++ b/drivers/pci/pcie/Makefile @@ -2,7 +2,7 @@ # # Makefile for PCI Express features and port driver -pcieportdrv-y := portdrv_core.o portdrv_pci.o rcec.o +pcieportdrv-y := portdrv.o rcec.o obj-$(CONFIG_PCIEPORTBUS) += pcieportdrv.o diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv.c similarity index 68% rename from drivers/pci/pcie/portdrv_core.c rename to drivers/pci/pcie/portdrv.c index 1ac7fec47d6fb9452641aed61abb95024dfe47c8..2cc2e60bcb3962bd38f5e1c28dabe1854d36a2e5 100644 --- a/drivers/pci/pcie/portdrv_core.c +++ b/drivers/pci/pcie/portdrv.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Purpose: PCI Express Port Bus Driver's Core Functions + * Purpose: PCI Express Port Bus Driver * * Copyright (C) 2004 Intel * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) */ +#include +#include #include #include #include @@ -19,6 +21,15 @@ #include "../pci.h" #include "portdrv.h" +/* + * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must + * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI + * supports a maximum of 32 vectors per function. + */ +#define PCIE_PORT_MAX_MSI_ENTRIES 32 + +#define get_descriptor_id(type, service) (((type - 4) << 8) | service) + struct portdrv_service_data { struct pcie_port_service_driver *drv; struct device *dev; @@ -209,6 +220,8 @@ static int get_port_device_capability(struct pci_dev *dev) int services = 0; if (dev->is_hotplug_bridge && + (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) && (pcie_ports_native || host->native_pcie_hotplug)) { services |= PCIE_PORT_SERVICE_HP; @@ -221,7 +234,9 @@ static int get_port_device_capability(struct pci_dev *dev) } #ifdef CONFIG_PCIEAER - if (dev->aer_cap && pci_aer_available() && + if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT || + pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) && + dev->aer_cap && pci_aer_available() && (pcie_ports_native || host->native_aer)) services |= PCIE_PORT_SERVICE_AER; #endif @@ -308,7 +323,7 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq) * Allocate the port extension structure and register services associated with * the port. */ -int pcie_port_device_register(struct pci_dev *dev) +static int pcie_port_device_register(struct pci_dev *dev) { int status, capabilities, i, nr_service; int irqs[PCIE_PORT_DEVICE_MAXSERVICES]; @@ -362,7 +377,7 @@ error_disable: typedef int (*pcie_callback_t)(struct pcie_device *); -int pcie_port_device_iter(struct device *dev, void *data) +static int pcie_port_device_iter(struct device *dev, void *data) { struct pcie_port_service_driver *service_driver; size_t offset = *(size_t *)data; @@ -382,13 +397,13 @@ int pcie_port_device_iter(struct device *dev, void *data) * pcie_port_device_suspend - suspend port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_suspend(struct device *dev) +static int pcie_port_device_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); } -int pcie_port_device_resume_noirq(struct device *dev) +static int pcie_port_device_resume_noirq(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume_noirq); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -398,7 +413,7 @@ int pcie_port_device_resume_noirq(struct device *dev) * pcie_port_device_resume - resume port services associated with a PCIe port * @dev: PCI Express port to handle */ -int pcie_port_device_resume(struct device *dev) +static int pcie_port_device_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -408,7 +423,7 @@ int pcie_port_device_resume(struct device *dev) * pcie_port_device_runtime_suspend - runtime suspend port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_suspend(struct device *dev) +static int pcie_port_device_runtime_suspend(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -418,7 +433,7 @@ int pcie_port_device_runtime_suspend(struct device *dev) * pcie_port_device_runtime_resume - runtime resume port services * @dev: PCI Express port to handle */ -int pcie_port_device_runtime_resume(struct device *dev) +static int pcie_port_device_runtime_resume(struct device *dev) { size_t off = offsetof(struct pcie_port_service_driver, runtime_resume); return device_for_each_child(dev, &off, pcie_port_device_iter); @@ -482,7 +497,7 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device); * Remove PCI Express port service devices associated with given port and * disable MSI-X or MSI for the port. */ -void pcie_port_device_remove(struct pci_dev *dev) +static void pcie_port_device_remove(struct pci_dev *dev) { device_for_each_child(&dev->dev, NULL, remove_iter); pci_free_irq_vectors(dev); @@ -573,7 +588,6 @@ int pcie_port_service_register(struct pcie_port_service_driver *new) return driver_register(&new->driver); } -EXPORT_SYMBOL(pcie_port_service_register); /** * pcie_port_service_unregister - unregister PCI Express port service driver @@ -583,4 +597,235 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv) { driver_unregister(&drv->driver); } -EXPORT_SYMBOL(pcie_port_service_unregister); + +/* If this switch is set, PCIe port native services should not be enabled. */ +bool pcie_ports_disabled; + +/* + * If the user specified "pcie_ports=native", use the PCIe services regardless + * of whether the platform has given us permission. On ACPI systems, this + * means we ignore _OSC. + */ +bool pcie_ports_native; + +/* + * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe + * service even if the platform hasn't given us permission. + */ +bool pcie_ports_dpc_native; + +static int __init pcie_port_setup(char *str) +{ + if (!strncmp(str, "compat", 6)) + pcie_ports_disabled = true; + else if (!strncmp(str, "native", 6)) + pcie_ports_native = true; + else if (!strncmp(str, "dpc-native", 10)) + pcie_ports_dpc_native = true; + + return 1; +} +__setup("pcie_ports=", pcie_port_setup); + +/* global data */ + +#ifdef CONFIG_PM +static int pcie_port_runtime_suspend(struct device *dev) +{ + if (!to_pci_dev(dev)->bridge_d3) + return -EBUSY; + + return pcie_port_device_runtime_suspend(dev); +} + +static int pcie_port_runtime_idle(struct device *dev) +{ + /* + * Assume the PCI core has set bridge_d3 whenever it thinks the port + * should be good to go to D3. Everything else, including moving + * the port to D3, is handled by the PCI core. + */ + return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; +} + +static const struct dev_pm_ops pcie_portdrv_pm_ops = { + .suspend = pcie_port_device_suspend, + .resume_noirq = pcie_port_device_resume_noirq, + .resume = pcie_port_device_resume, + .freeze = pcie_port_device_suspend, + .thaw = pcie_port_device_resume, + .poweroff = pcie_port_device_suspend, + .restore_noirq = pcie_port_device_resume_noirq, + .restore = pcie_port_device_resume, + .runtime_suspend = pcie_port_runtime_suspend, + .runtime_resume = pcie_port_device_runtime_resume, + .runtime_idle = pcie_port_runtime_idle, +}; + +#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) + +#else /* !PM */ + +#define PCIE_PORTDRV_PM_OPS NULL +#endif /* !PM */ + +/* + * pcie_portdrv_probe - Probe PCI-Express port devices + * @dev: PCI-Express port device being probed + * + * If detected invokes the pcie_port_device_register() method for + * this port device. + * + */ +static int pcie_portdrv_probe(struct pci_dev *dev, + const struct pci_device_id *id) +{ + int type = pci_pcie_type(dev); + int status; + + if (!pci_is_pcie(dev) || + ((type != PCI_EXP_TYPE_ROOT_PORT) && + (type != PCI_EXP_TYPE_UPSTREAM) && + (type != PCI_EXP_TYPE_DOWNSTREAM) && + (type != PCI_EXP_TYPE_RC_EC))) + return -ENODEV; + + if (type == PCI_EXP_TYPE_RC_EC) + pcie_link_rcec(dev); + + status = pcie_port_device_register(dev); + if (status) + return status; + + pci_save_state(dev); + + dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | + DPM_FLAG_SMART_SUSPEND); + + if (pci_bridge_d3_possible(dev)) { + /* + * Keep the port resumed 100ms to make sure things like + * config space accesses from userspace (lspci) will not + * cause the port to repeatedly suspend and resume. + */ + pm_runtime_set_autosuspend_delay(&dev->dev, 100); + pm_runtime_use_autosuspend(&dev->dev); + pm_runtime_mark_last_busy(&dev->dev); + pm_runtime_put_autosuspend(&dev->dev); + pm_runtime_allow(&dev->dev); + } + + return 0; +} + +static void pcie_portdrv_remove(struct pci_dev *dev) +{ + if (pci_bridge_d3_possible(dev)) { + pm_runtime_forbid(&dev->dev); + pm_runtime_get_noresume(&dev->dev); + pm_runtime_dont_use_autosuspend(&dev->dev); + } + + pcie_port_device_remove(dev); +} + +static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, + pci_channel_state_t error) +{ + if (error == pci_channel_io_frozen) + return PCI_ERS_RESULT_NEED_RESET; + return PCI_ERS_RESULT_CAN_RECOVER; +} + +static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) +{ + size_t off = offsetof(struct pcie_port_service_driver, slot_reset); + device_for_each_child(&dev->dev, &off, pcie_port_device_iter); + + pci_restore_state(dev); + pci_save_state(dev); + return PCI_ERS_RESULT_RECOVERED; +} + +static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) +{ + return PCI_ERS_RESULT_RECOVERED; +} + +/* + * LINUX Device Driver Model + */ +static const struct pci_device_id port_pci_ids[] = { + /* handle any PCI-Express port */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, + /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ + { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, + /* handle any Root Complex Event Collector */ + { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, + { }, +}; + +static const struct pci_error_handlers pcie_portdrv_err_handler = { + .error_detected = pcie_portdrv_error_detected, + .slot_reset = pcie_portdrv_slot_reset, + .mmio_enabled = pcie_portdrv_mmio_enabled, +}; + +static struct pci_driver pcie_portdriver = { + .name = "pcieport", + .id_table = &port_pci_ids[0], + + .probe = pcie_portdrv_probe, + .remove = pcie_portdrv_remove, + .shutdown = pcie_portdrv_remove, + + .err_handler = &pcie_portdrv_err_handler, + + .driver_managed_dma = true, + + .driver.pm = PCIE_PORTDRV_PM_OPS, +}; + +static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) +{ + pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", + d->ident); + pcie_pme_disable_msi(); + return 0; +} + +static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { + /* + * Boxes that should not use MSI for PCIe PME signaling. + */ + { + .callback = dmi_pcie_pme_disable_msi, + .ident = "MSI Wind U-100", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, + "MICRO-STAR INTERNATIONAL CO., LTD"), + DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), + }, + }, + {} +}; + +static void __init pcie_init_services(void) +{ + pcie_aer_init(); + pcie_pme_init(); + pcie_dpc_init(); + pcie_hp_init(); +} + +static int __init pcie_portdrv_init(void) +{ + if (pcie_ports_disabled) + return -EACCES; + + pcie_init_services(); + dmi_check_system(pcie_portdrv_dmi_table); + + return pci_register_driver(&pcie_portdriver); +} +device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h index 0ef4bf5f811d9d74ee134f1041fc1538fc0f838c..58a2b1a1cae4c03c60416cf72f909c33fecf6d90 100644 --- a/drivers/pci/pcie/portdrv.h +++ b/drivers/pci/pcie/portdrv.h @@ -98,26 +98,7 @@ struct pcie_port_service_driver { int pcie_port_service_register(struct pcie_port_service_driver *new); void pcie_port_service_unregister(struct pcie_port_service_driver *new); -/* - * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must - * be one of the first 32 MSI-X entries. Per PCI r3.0, sec 6.8.3.1, MSI - * supports a maximum of 32 vectors per function. - */ -#define PCIE_PORT_MAX_MSI_ENTRIES 32 - -#define get_descriptor_id(type, service) (((type - 4) << 8) | service) - extern struct bus_type pcie_port_bus_type; -int pcie_port_device_register(struct pci_dev *dev); -int pcie_port_device_iter(struct device *dev, void *data); -#ifdef CONFIG_PM -int pcie_port_device_suspend(struct device *dev); -int pcie_port_device_resume_noirq(struct device *dev); -int pcie_port_device_resume(struct device *dev); -int pcie_port_device_runtime_suspend(struct device *dev); -int pcie_port_device_runtime_resume(struct device *dev); -#endif -void pcie_port_device_remove(struct pci_dev *dev); struct pci_dev; diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c deleted file mode 100644 index 7f8788a970ae18f1be9db6d7d2e863b0c82435bb..0000000000000000000000000000000000000000 --- a/drivers/pci/pcie/portdrv_pci.c +++ /dev/null @@ -1,252 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Purpose: PCI Express Port Bus Driver - * Author: Tom Nguyen - * - * Copyright (C) 2004 Intel - * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com) - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "../pci.h" -#include "portdrv.h" - -/* If this switch is set, PCIe port native services should not be enabled. */ -bool pcie_ports_disabled; - -/* - * If the user specified "pcie_ports=native", use the PCIe services regardless - * of whether the platform has given us permission. On ACPI systems, this - * means we ignore _OSC. - */ -bool pcie_ports_native; - -/* - * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe - * service even if the platform hasn't given us permission. - */ -bool pcie_ports_dpc_native; - -static int __init pcie_port_setup(char *str) -{ - if (!strncmp(str, "compat", 6)) - pcie_ports_disabled = true; - else if (!strncmp(str, "native", 6)) - pcie_ports_native = true; - else if (!strncmp(str, "dpc-native", 10)) - pcie_ports_dpc_native = true; - - return 1; -} -__setup("pcie_ports=", pcie_port_setup); - -/* global data */ - -#ifdef CONFIG_PM -static int pcie_port_runtime_suspend(struct device *dev) -{ - if (!to_pci_dev(dev)->bridge_d3) - return -EBUSY; - - return pcie_port_device_runtime_suspend(dev); -} - -static int pcie_port_runtime_idle(struct device *dev) -{ - /* - * Assume the PCI core has set bridge_d3 whenever it thinks the port - * should be good to go to D3. Everything else, including moving - * the port to D3, is handled by the PCI core. - */ - return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY; -} - -static const struct dev_pm_ops pcie_portdrv_pm_ops = { - .suspend = pcie_port_device_suspend, - .resume_noirq = pcie_port_device_resume_noirq, - .resume = pcie_port_device_resume, - .freeze = pcie_port_device_suspend, - .thaw = pcie_port_device_resume, - .poweroff = pcie_port_device_suspend, - .restore_noirq = pcie_port_device_resume_noirq, - .restore = pcie_port_device_resume, - .runtime_suspend = pcie_port_runtime_suspend, - .runtime_resume = pcie_port_device_runtime_resume, - .runtime_idle = pcie_port_runtime_idle, -}; - -#define PCIE_PORTDRV_PM_OPS (&pcie_portdrv_pm_ops) - -#else /* !PM */ - -#define PCIE_PORTDRV_PM_OPS NULL -#endif /* !PM */ - -/* - * pcie_portdrv_probe - Probe PCI-Express port devices - * @dev: PCI-Express port device being probed - * - * If detected invokes the pcie_port_device_register() method for - * this port device. - * - */ -static int pcie_portdrv_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - int type = pci_pcie_type(dev); - int status; - - if (!pci_is_pcie(dev) || - ((type != PCI_EXP_TYPE_ROOT_PORT) && - (type != PCI_EXP_TYPE_UPSTREAM) && - (type != PCI_EXP_TYPE_DOWNSTREAM) && - (type != PCI_EXP_TYPE_RC_EC))) - return -ENODEV; - - if (type == PCI_EXP_TYPE_RC_EC) - pcie_link_rcec(dev); - - status = pcie_port_device_register(dev); - if (status) - return status; - - pci_save_state(dev); - - dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE | - DPM_FLAG_SMART_SUSPEND); - - if (pci_bridge_d3_possible(dev)) { - /* - * Keep the port resumed 100ms to make sure things like - * config space accesses from userspace (lspci) will not - * cause the port to repeatedly suspend and resume. - */ - pm_runtime_set_autosuspend_delay(&dev->dev, 100); - pm_runtime_use_autosuspend(&dev->dev); - pm_runtime_mark_last_busy(&dev->dev); - pm_runtime_put_autosuspend(&dev->dev); - pm_runtime_allow(&dev->dev); - } - - return 0; -} - -static void pcie_portdrv_remove(struct pci_dev *dev) -{ - if (pci_bridge_d3_possible(dev)) { - pm_runtime_forbid(&dev->dev); - pm_runtime_get_noresume(&dev->dev); - pm_runtime_dont_use_autosuspend(&dev->dev); - } - - pcie_port_device_remove(dev); -} - -static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, - pci_channel_state_t error) -{ - if (error == pci_channel_io_frozen) - return PCI_ERS_RESULT_NEED_RESET; - return PCI_ERS_RESULT_CAN_RECOVER; -} - -static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev) -{ - size_t off = offsetof(struct pcie_port_service_driver, slot_reset); - device_for_each_child(&dev->dev, &off, pcie_port_device_iter); - - pci_restore_state(dev); - pci_save_state(dev); - return PCI_ERS_RESULT_RECOVERED; -} - -static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev) -{ - return PCI_ERS_RESULT_RECOVERED; -} - -/* - * LINUX Device Driver Model - */ -static const struct pci_device_id port_pci_ids[] = { - /* handle any PCI-Express port */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) }, - /* subtractive decode PCI-to-PCI bridge, class type is 060401h */ - { PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) }, - /* handle any Root Complex Event Collector */ - { PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) }, - { }, -}; - -static const struct pci_error_handlers pcie_portdrv_err_handler = { - .error_detected = pcie_portdrv_error_detected, - .slot_reset = pcie_portdrv_slot_reset, - .mmio_enabled = pcie_portdrv_mmio_enabled, -}; - -static struct pci_driver pcie_portdriver = { - .name = "pcieport", - .id_table = &port_pci_ids[0], - - .probe = pcie_portdrv_probe, - .remove = pcie_portdrv_remove, - .shutdown = pcie_portdrv_remove, - - .err_handler = &pcie_portdrv_err_handler, - - .driver_managed_dma = true, - - .driver.pm = PCIE_PORTDRV_PM_OPS, -}; - -static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d) -{ - pr_notice("%s detected: will not use MSI for PCIe PME signaling\n", - d->ident); - pcie_pme_disable_msi(); - return 0; -} - -static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = { - /* - * Boxes that should not use MSI for PCIe PME signaling. - */ - { - .callback = dmi_pcie_pme_disable_msi, - .ident = "MSI Wind U-100", - .matches = { - DMI_MATCH(DMI_SYS_VENDOR, - "MICRO-STAR INTERNATIONAL CO., LTD"), - DMI_MATCH(DMI_PRODUCT_NAME, "U-100"), - }, - }, - {} -}; - -static void __init pcie_init_services(void) -{ - pcie_aer_init(); - pcie_pme_init(); - pcie_dpc_init(); - pcie_hp_init(); -} - -static int __init pcie_portdrv_init(void) -{ - if (pcie_ports_disabled) - return -EACCES; - - pcie_init_services(); - dmi_check_system(pcie_portdrv_dmi_table); - - return pci_register_driver(&pcie_portdriver); -} -device_initcall(pcie_portdrv_init); diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 334fd91f01e1e6e528105cae7230339066c8aea1..1779582fb5007cd1ba0b054e78c545d0d4f644c3 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -904,6 +904,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) bus->domain_nr = pci_bus_find_domain_nr(bus, parent); else bus->domain_nr = bridge->domain_nr; + if (bus->domain_nr < 0) { + err = bus->domain_nr; + goto free; + } #endif b = pci_find_bus(pci_domain_nr(bus), bridge->busnr); @@ -1028,6 +1032,9 @@ unregister: device_del(&bridge->dev); free: +#ifdef CONFIG_PCI_DOMAINS_GENERIC + pci_bus_release_domain_nr(bus, parent); +#endif kfree(bus); return err; } @@ -1889,9 +1896,6 @@ int pci_setup_device(struct pci_dev *dev) dev->broken_intx_masking = pci_intx_mask_broken(dev); - /* Clear errors left from system firmware */ - pci_write_config_word(dev, PCI_STATUS, 0xffff); - switch (dev->hdr_type) { /* header type */ case PCI_HEADER_TYPE_NORMAL: /* standard header */ if (class == PCI_CLASS_BRIDGE_PCI) diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c index 4c54c75050dc12179b53d82425699715d7ad3597..0145aef1b9301823e3a406611353b596036e8a77 100644 --- a/drivers/pci/remove.c +++ b/drivers/pci/remove.c @@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus) pci_remove_bus(bus); host_bridge->bus = NULL; +#ifdef CONFIG_PCI_DOMAINS_GENERIC + /* Release domain_nr if it was dynamically allocated */ + if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET) + pci_bus_release_domain_nr(bus, host_bridge->dev.parent); +#endif + /* remove the host bridge */ device_del(&host_bridge->dev); } diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c index 3852c18362f53ec2630a7f10c5a6d6e6baf59a9c..f6507efe2a5846d8180b1baf1047424fd65303bb 100644 --- a/drivers/perf/riscv_pmu_sbi.c +++ b/drivers/perf/riscv_pmu_sbi.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -47,6 +48,8 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = { * per_cpu in case of harts with different pmu counters */ static union sbi_pmu_ctr_info *pmu_ctr_list; +static bool riscv_pmu_use_irq; +static unsigned int riscv_pmu_irq_num; static unsigned int riscv_pmu_irq; struct sbi_pmu_event_data { @@ -580,7 +583,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS); event = cpu_hw_evt->events[fidx]; if (!event) { - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); return IRQ_NONE; } @@ -588,13 +591,13 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev) pmu_sbi_stop_hw_ctrs(pmu); /* Overflow status register should only be read after counter are stopped */ - overflow = csr_read(CSR_SSCOUNTOVF); + ALT_SBI_PMU_OVERFLOW(overflow); /* * Overflow interrupt pending bit should only be cleared after stopping * all the counters to avoid any race condition. */ - csr_clear(CSR_SIP, SIP_LCOFIP); + csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num)); /* No overflow bit is set */ if (!overflow) @@ -661,10 +664,10 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) /* Stop all the counters so that they can be enabled from perf */ pmu_sbi_stop_all(pmu); - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { cpu_hw_evt->irq = riscv_pmu_irq; - csr_clear(CSR_IP, BIT(RV_IRQ_PMU)); - csr_set(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IP, BIT(riscv_pmu_irq_num)); + csr_set(CSR_IE, BIT(riscv_pmu_irq_num)); enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE); } @@ -673,9 +676,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node) static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node) { - if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + if (riscv_pmu_use_irq) { disable_percpu_irq(riscv_pmu_irq); - csr_clear(CSR_IE, BIT(RV_IRQ_PMU)); + csr_clear(CSR_IE, BIT(riscv_pmu_irq_num)); } /* Disable all counters access for user mode now */ @@ -691,7 +694,18 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde struct device_node *cpu, *child; struct irq_domain *domain = NULL; - if (!riscv_isa_extension_available(NULL, SSCOFPMF)) + if (riscv_isa_extension_available(NULL, SSCOFPMF)) { + riscv_pmu_irq_num = RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) && + riscv_cached_mvendorid(0) == THEAD_VENDOR_ID && + riscv_cached_marchid(0) == 0 && + riscv_cached_mimpid(0) == 0) { + riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU; + riscv_pmu_use_irq = true; + } + + if (!riscv_pmu_use_irq) return -EOPNOTSUPP; for_each_of_cpu_node(cpu) { @@ -713,7 +727,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde return -ENODEV; } - riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU); + riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num); if (!riscv_pmu_irq) { pr_err("Failed to map PMU interrupt for node\n"); return -ENODEV; diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c index 13b26a1c79886bacd8742d1404588096bd224170..d3f3a611f95b41a11bde8ed2fe7bae18dad5f21b 100644 --- a/drivers/s390/cio/vfio_ccw_chp.c +++ b/drivers/s390/cio/vfio_ccw_chp.c @@ -16,6 +16,7 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private, char __user *buf, size_t count, loff_t *ppos) { + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS; loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK; struct ccw_schib_region *region; @@ -27,12 +28,12 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private, mutex_lock(&private->io_mutex); region = private->region[i].data; - if (cio_update_schib(private->sch)) { + if (cio_update_schib(sch)) { ret = -ENODEV; goto out; } - memcpy(region, &private->sch->schib, sizeof(*region)); + memcpy(region, &sch->schib, sizeof(*region)); if (copy_to_user(buf, (void *)region + pos, count)) { ret = -EFAULT; diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c index 7f5402fe857a2e6d2fbc5292b1fd88718c3d49d1..54aba7cceb33f98bc80c3f09bb7bceb95f2b8cdf 100644 --- a/drivers/s390/cio/vfio_ccw_drv.c +++ b/drivers/s390/cio/vfio_ccw_drv.c @@ -23,10 +23,10 @@ #include "vfio_ccw_private.h" struct workqueue_struct *vfio_ccw_work_q; -static struct kmem_cache *vfio_ccw_io_region; -static struct kmem_cache *vfio_ccw_cmd_region; -static struct kmem_cache *vfio_ccw_schib_region; -static struct kmem_cache *vfio_ccw_crw_region; +struct kmem_cache *vfio_ccw_io_region; +struct kmem_cache *vfio_ccw_cmd_region; +struct kmem_cache *vfio_ccw_schib_region; +struct kmem_cache *vfio_ccw_crw_region; debug_info_t *vfio_ccw_debug_msg_id; debug_info_t *vfio_ccw_debug_trace_id; @@ -36,10 +36,19 @@ debug_info_t *vfio_ccw_debug_trace_id; */ int vfio_ccw_sch_quiesce(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); DECLARE_COMPLETION_ONSTACK(completion); int iretry, ret = 0; + /* + * Probably an impossible situation, after being called through + * FSM callbacks. But in the event it did, register a warning + * and return as if things were fine. + */ + if (WARN_ON(!private)) + return 0; + iretry = 255; do { @@ -70,7 +79,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch) return ret; } -static void vfio_ccw_sch_io_todo(struct work_struct *work) +void vfio_ccw_sch_io_todo(struct work_struct *work) { struct vfio_ccw_private *private; struct irb *irb; @@ -106,7 +115,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work) eventfd_signal(private->io_trigger, 1); } -static void vfio_ccw_crw_todo(struct work_struct *work) +void vfio_ccw_crw_todo(struct work_struct *work) { struct vfio_ccw_private *private; @@ -121,90 +130,39 @@ static void vfio_ccw_crw_todo(struct work_struct *work) */ static void vfio_ccw_sch_irq(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); + + /* + * The subchannel should still be disabled at this point, + * so an interrupt would be quite surprising. As with an + * interrupt while the FSM is closed, let's attempt to + * disable the subchannel again. + */ + if (!private) { + VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: unexpected interrupt\n", + sch->schid.cssid, sch->schid.ssid, + sch->schid.sch_no); + + cio_disable_subchannel(sch); + return; + } inc_irq_stat(IRQIO_CIO); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT); } -static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch) +static void vfio_ccw_free_parent(struct device *dev) { - struct vfio_ccw_private *private; + struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev); - private = kzalloc(sizeof(*private), GFP_KERNEL); - if (!private) - return ERR_PTR(-ENOMEM); - - private->sch = sch; - mutex_init(&private->io_mutex); - private->state = VFIO_CCW_STATE_STANDBY; - INIT_LIST_HEAD(&private->crw); - INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); - INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); - - private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), - GFP_KERNEL); - if (!private->cp.guest_cp) - goto out_free_private; - - private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, - GFP_KERNEL | GFP_DMA); - if (!private->io_region) - goto out_free_cp; - - private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region, - GFP_KERNEL | GFP_DMA); - if (!private->cmd_region) - goto out_free_io; - - private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region, - GFP_KERNEL | GFP_DMA); - - if (!private->schib_region) - goto out_free_cmd; - - private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region, - GFP_KERNEL | GFP_DMA); - - if (!private->crw_region) - goto out_free_schib; - return private; - -out_free_schib: - kmem_cache_free(vfio_ccw_schib_region, private->schib_region); -out_free_cmd: - kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); -out_free_io: - kmem_cache_free(vfio_ccw_io_region, private->io_region); -out_free_cp: - kfree(private->cp.guest_cp); -out_free_private: - mutex_destroy(&private->io_mutex); - kfree(private); - return ERR_PTR(-ENOMEM); + kfree(parent); } -static void vfio_ccw_free_private(struct vfio_ccw_private *private) -{ - struct vfio_ccw_crw *crw, *temp; - - list_for_each_entry_safe(crw, temp, &private->crw, next) { - list_del(&crw->next); - kfree(crw); - } - - kmem_cache_free(vfio_ccw_crw_region, private->crw_region); - kmem_cache_free(vfio_ccw_schib_region, private->schib_region); - kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); - kmem_cache_free(vfio_ccw_io_region, private->io_region); - kfree(private->cp.guest_cp); - mutex_destroy(&private->io_mutex); - kfree(private); -} static int vfio_ccw_sch_probe(struct subchannel *sch) { struct pmcw *pmcw = &sch->schib.pmcw; - struct vfio_ccw_private *private; + struct vfio_ccw_parent *parent; int ret = -ENOMEM; if (pmcw->qf) { @@ -213,42 +171,50 @@ static int vfio_ccw_sch_probe(struct subchannel *sch) return -ENODEV; } - private = vfio_ccw_alloc_private(sch); - if (IS_ERR(private)) - return PTR_ERR(private); + parent = kzalloc(sizeof(*parent), GFP_KERNEL); + if (!parent) + return -ENOMEM; + + dev_set_name(&parent->dev, "parent"); + parent->dev.parent = &sch->dev; + parent->dev.release = &vfio_ccw_free_parent; + ret = device_register(&parent->dev); + if (ret) + goto out_free; - dev_set_drvdata(&sch->dev, private); + dev_set_drvdata(&sch->dev, parent); - private->mdev_type.sysfs_name = "io"; - private->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)"; - private->mdev_types[0] = &private->mdev_type; - ret = mdev_register_parent(&private->parent, &sch->dev, + parent->mdev_type.sysfs_name = "io"; + parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)"; + parent->mdev_types[0] = &parent->mdev_type; + ret = mdev_register_parent(&parent->parent, &sch->dev, &vfio_ccw_mdev_driver, - private->mdev_types, 1); + parent->mdev_types, 1); if (ret) - goto out_free; + goto out_unreg; VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); return 0; +out_unreg: + device_del(&parent->dev); out_free: + put_device(&parent->dev); dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_private(private); return ret; } static void vfio_ccw_sch_remove(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); - mdev_unregister_parent(&private->parent); + mdev_unregister_parent(&parent->parent); + device_unregister(&parent->dev); dev_set_drvdata(&sch->dev, NULL); - vfio_ccw_free_private(private); - VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no); @@ -256,7 +222,11 @@ static void vfio_ccw_sch_remove(struct subchannel *sch) static void vfio_ccw_sch_shutdown(struct subchannel *sch) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); + + if (WARN_ON(!private)) + return; vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE); vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); @@ -274,7 +244,8 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch) */ static int vfio_ccw_sch_event(struct subchannel *sch, int process) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); unsigned long flags; int rc = -EAGAIN; @@ -287,8 +258,10 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process) rc = 0; - if (cio_update_schib(sch)) - vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + if (cio_update_schib(sch)) { + if (private) + vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER); + } out_unlock: spin_unlock_irqrestore(sch->lock, flags); @@ -326,14 +299,15 @@ static void vfio_ccw_queue_crw(struct vfio_ccw_private *private, static int vfio_ccw_chp_event(struct subchannel *sch, struct chp_link *link, int event) { - struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); int mask = chp_ssd_get_mask(&sch->ssd_info, link); int retry = 255; if (!private || !mask) return 0; - trace_vfio_ccw_chp_event(private->sch->schid, mask, event); + trace_vfio_ccw_chp_event(sch->schid, mask, event); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n", sch->schid.cssid, sch->schid.ssid, sch->schid.sch_no, diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c index 0a5e8b4a674342b31d784b0ed73c777d5d40c51b..2784a4e4d2bef3c9383ee10eb66f8ded33cb00f9 100644 --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@ -18,15 +18,13 @@ static int fsm_io_helper(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); union orb *orb; int ccode; __u8 lpm; unsigned long flags; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); orb = cp_get_orb(&private->cp, (u32)virt_to_phys(sch), sch->lpm); @@ -80,13 +78,11 @@ out: static int fsm_do_halt(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned long flags; int ccode; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); VFIO_CCW_TRACE_EVENT(2, "haltIO"); @@ -121,13 +117,11 @@ static int fsm_do_halt(struct vfio_ccw_private *private) static int fsm_do_clear(struct vfio_ccw_private *private) { - struct subchannel *sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); unsigned long flags; int ccode; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); VFIO_CCW_TRACE_EVENT(2, "clearIO"); @@ -160,7 +154,7 @@ static int fsm_do_clear(struct vfio_ccw_private *private) static void fsm_notoper(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n", sch->schid.cssid, @@ -228,7 +222,7 @@ static void fsm_async_retry(struct vfio_ccw_private *private, static void fsm_disabled_irq(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); /* * An interrupt in a disabled state means a previous disable was not @@ -238,7 +232,9 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private, } inline struct subchannel_id get_schid(struct vfio_ccw_private *p) { - return p->sch->schid; + struct subchannel *sch = to_subchannel(p->vdev.dev->parent); + + return sch->schid; } /* @@ -360,10 +356,11 @@ static void fsm_async_request(struct vfio_ccw_private *private, static void fsm_irq(struct vfio_ccw_private *private, enum vfio_ccw_event event) { + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); struct irb *irb = this_cpu_ptr(&cio_irb); VFIO_CCW_TRACE_EVENT(6, "IRQ"); - VFIO_CCW_TRACE_EVENT(6, dev_name(&private->sch->dev)); + VFIO_CCW_TRACE_EVENT(6, dev_name(&sch->dev)); memcpy(&private->irb, irb, sizeof(*irb)); @@ -376,7 +373,7 @@ static void fsm_irq(struct vfio_ccw_private *private, static void fsm_open(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); int ret; spin_lock_irq(sch->lock); @@ -397,7 +394,7 @@ err_unlock: static void fsm_close(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - struct subchannel *sch = private->sch; + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); int ret; spin_lock_irq(sch->lock); diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c index 6ae4d012d800845bf5cad2b68abefd5bbc4e2164..5b53b94f13c741cc089c98373eaecc9a30e74f34 100644 --- a/drivers/s390/cio/vfio_ccw_ops.c +++ b/drivers/s390/cio/vfio_ccw_ops.c @@ -49,26 +49,70 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev) struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); - init_completion(&private->release_comp); + mutex_init(&private->io_mutex); + private->state = VFIO_CCW_STATE_STANDBY; + INIT_LIST_HEAD(&private->crw); + INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo); + INIT_WORK(&private->crw_work, vfio_ccw_crw_todo); + + private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1), + GFP_KERNEL); + if (!private->cp.guest_cp) + goto out_free_private; + + private->io_region = kmem_cache_zalloc(vfio_ccw_io_region, + GFP_KERNEL | GFP_DMA); + if (!private->io_region) + goto out_free_cp; + + private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region, + GFP_KERNEL | GFP_DMA); + if (!private->cmd_region) + goto out_free_io; + + private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region, + GFP_KERNEL | GFP_DMA); + if (!private->schib_region) + goto out_free_cmd; + + private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region, + GFP_KERNEL | GFP_DMA); + if (!private->crw_region) + goto out_free_schib; + return 0; + +out_free_schib: + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); +out_free_cmd: + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); +out_free_io: + kmem_cache_free(vfio_ccw_io_region, private->io_region); +out_free_cp: + kfree(private->cp.guest_cp); +out_free_private: + mutex_destroy(&private->io_mutex); + return -ENOMEM; } static int vfio_ccw_mdev_probe(struct mdev_device *mdev) { - struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); + struct subchannel *sch = to_subchannel(mdev->dev.parent); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private; int ret; - if (private->state == VFIO_CCW_STATE_NOT_OPER) - return -ENODEV; + private = vfio_alloc_device(vfio_ccw_private, vdev, &mdev->dev, + &vfio_ccw_dev_ops); + if (IS_ERR(private)) + return PTR_ERR(private); - ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops); - if (ret) - return ret; + dev_set_drvdata(&parent->dev, private); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n", - private->sch->schid.cssid, - private->sch->schid.ssid, - private->sch->schid.sch_no); + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no); ret = vfio_register_emulated_iommu_dev(&private->vdev); if (ret) @@ -77,6 +121,7 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev) return 0; err_put_vdev: + dev_set_drvdata(&parent->dev, NULL); vfio_put_device(&private->vdev); return ret; } @@ -85,40 +130,36 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev) { struct vfio_ccw_private *private = container_of(vdev, struct vfio_ccw_private, vdev); + struct vfio_ccw_crw *crw, *temp; - /* - * We cannot free vfio_ccw_private here because it includes - * parent info which must be free'ed by css driver. - * - * Use a workaround by memset'ing the core device part and - * then notifying the remove path that all active references - * to this device have been released. - */ - memset(vdev, 0, sizeof(*vdev)); - complete(&private->release_comp); + list_for_each_entry_safe(crw, temp, &private->crw, next) { + list_del(&crw->next); + kfree(crw); + } + + kmem_cache_free(vfio_ccw_crw_region, private->crw_region); + kmem_cache_free(vfio_ccw_schib_region, private->schib_region); + kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region); + kmem_cache_free(vfio_ccw_io_region, private->io_region); + kfree(private->cp.guest_cp); + mutex_destroy(&private->io_mutex); } static void vfio_ccw_mdev_remove(struct mdev_device *mdev) { - struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent); + struct subchannel *sch = to_subchannel(mdev->dev.parent); + struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev); + struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev); VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n", - private->sch->schid.cssid, - private->sch->schid.ssid, - private->sch->schid.sch_no); + sch->schid.cssid, + sch->schid.ssid, + sch->schid.sch_no); vfio_unregister_group_dev(&private->vdev); + dev_set_drvdata(&parent->dev, NULL); vfio_put_device(&private->vdev); - /* - * Wait for all active references on mdev are released so it - * is safe to defer kfree() to a later point. - * - * TODO: the clean fix is to split parent/mdev info from ccw - * private structure so each can be managed in its own life - * cycle. - */ - wait_for_completion(&private->release_comp); } static int vfio_ccw_mdev_open_device(struct vfio_device *vdev) @@ -588,6 +629,9 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = { .ioctl = vfio_ccw_mdev_ioctl, .request = vfio_ccw_mdev_request, .dma_unmap = vfio_ccw_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; struct mdev_driver vfio_ccw_mdev_driver = { diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h index bd5fb81456af854c0a87a2b27262e72f66a3d04d..b441ae6700fd235e6bb2044b6cda33b217c5dfe6 100644 --- a/drivers/s390/cio/vfio_ccw_private.h +++ b/drivers/s390/cio/vfio_ccw_private.h @@ -67,10 +67,24 @@ struct vfio_ccw_crw { struct crw crw; }; +/** + * struct vfio_ccw_parent + * + * @dev: embedded device struct + * @parent: parent data structures for mdevs created + * @mdev_type(s): identifying information for mdevs created + */ +struct vfio_ccw_parent { + struct device dev; + + struct mdev_parent parent; + struct mdev_type mdev_type; + struct mdev_type *mdev_types[1]; +}; + /** * struct vfio_ccw_private * @vdev: Embedded VFIO device - * @sch: pointer to the subchannel * @state: internal state of the device * @completion: synchronization helper of the I/O completion * @io_region: MMIO region to input/output I/O arguments/results @@ -88,12 +102,9 @@ struct vfio_ccw_crw { * @req_trigger: eventfd ctx for signaling userspace to return device * @io_work: work for deferral process of I/O handling * @crw_work: work for deferral process of CRW handling - * @release_comp: synchronization helper for vfio device release - * @parent: parent data structures for mdevs created */ struct vfio_ccw_private { struct vfio_device vdev; - struct subchannel *sch; int state; struct completion *completion; struct ccw_io_region *io_region; @@ -114,15 +125,11 @@ struct vfio_ccw_private { struct eventfd_ctx *req_trigger; struct work_struct io_work; struct work_struct crw_work; - - struct completion release_comp; - - struct mdev_parent parent; - struct mdev_type mdev_type; - struct mdev_type *mdev_types[1]; } __aligned(8); int vfio_ccw_sch_quiesce(struct subchannel *sch); +void vfio_ccw_sch_io_todo(struct work_struct *work); +void vfio_ccw_crw_todo(struct work_struct *work); extern struct mdev_driver vfio_ccw_mdev_driver; @@ -162,12 +169,18 @@ extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS]; static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private, enum vfio_ccw_event event) { - trace_vfio_ccw_fsm_event(private->sch->schid, private->state, event); + struct subchannel *sch = to_subchannel(private->vdev.dev->parent); + + if (sch) + trace_vfio_ccw_fsm_event(sch->schid, private->state, event); vfio_ccw_jumptable[private->state][event](private, event); } extern struct workqueue_struct *vfio_ccw_work_q; - +extern struct kmem_cache *vfio_ccw_io_region; +extern struct kmem_cache *vfio_ccw_cmd_region; +extern struct kmem_cache *vfio_ccw_schib_region; +extern struct kmem_cache *vfio_ccw_crw_region; /* s390 debug feature, similar to base cio */ extern debug_info_t *vfio_ccw_debug_msg_id; diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index f43cfeabd2cc8b709f6d3e75b4ae44170195cf27..997b524bdd2b5bffbd01782e970e318c6d40c013 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -122,7 +122,7 @@ static int vfio_ap_matrix_dev_create(void) return 0; matrix_drv_err: - device_unregister(&matrix_dev->device); + device_del(&matrix_dev->device); matrix_reg_err: put_device(&matrix_dev->device); matrix_alloc_err: diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index 0b4cc8c597ae67845c8d01c434e93ec05c3a5195..9c01957e56b3f17f494dcb8766646a404eb35280 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -429,7 +429,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q, aqic_gisa.isc = nisc; aqic_gisa.ir = 1; - aqic_gisa.gisa = (uint64_t)gisa >> 4; + aqic_gisa.gisa = virt_to_phys(gisa) >> 4; status = ap_aqic(q->apqn, aqic_gisa, h_nib); switch (status.response_code) { @@ -765,11 +765,6 @@ static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev) } } -static void vfio_ap_mdev_release_dev(struct vfio_device *vdev) -{ - vfio_free_device(vdev); -} - static void vfio_ap_mdev_remove(struct mdev_device *mdev) { struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev); @@ -1535,13 +1530,29 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, return 0; } +static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length) +{ + struct ap_queue_table *qtable = &matrix_mdev->qtable; + struct vfio_ap_queue *q; + int loop_cursor; + + hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) { + if (q->saved_iova >= iova && q->saved_iova < iova + length) + vfio_ap_irq_disable(q); + } +} + static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova, u64 length) { struct ap_matrix_mdev *matrix_mdev = container_of(vdev, struct ap_matrix_mdev, vdev); - vfio_unpin_pages(&matrix_mdev->vdev, iova, 1); + mutex_lock(&matrix_dev->mdevs_lock); + + unmap_iova(matrix_mdev, iova, length); + + mutex_unlock(&matrix_dev->mdevs_lock); } /** @@ -1784,11 +1795,13 @@ static const struct attribute_group vfio_queue_attr_group = { static const struct vfio_device_ops vfio_ap_matrix_dev_ops = { .init = vfio_ap_mdev_init_dev, - .release = vfio_ap_mdev_release_dev, .open_device = vfio_ap_mdev_open_device, .close_device = vfio_ap_mdev_close_device, .ioctl = vfio_ap_mdev_ioctl, .dma_unmap = vfio_ap_mdev_dma_unmap, + .bind_iommufd = vfio_iommufd_emulated_bind, + .unbind_iommufd = vfio_iommufd_emulated_unbind, + .attach_ioas = vfio_iommufd_emulated_attach_ioas, }; static struct mdev_driver vfio_ap_matrix_driver = { diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c index f94b43ce9a65836690ffe3a57688e5d340d63a58..4bf36e53fe3e4af6cd41e690dd77dcf5613173ee 100644 --- a/drivers/s390/crypto/zcrypt_api.c +++ b/drivers/s390/crypto/zcrypt_api.c @@ -53,10 +53,6 @@ MODULE_LICENSE("GPL"); EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_req); EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_rep); -static int zcrypt_hwrng_seed = 1; -module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, 0440); -MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on)."); - DEFINE_SPINLOCK(zcrypt_list_lock); LIST_HEAD(zcrypt_card_list); @@ -2063,8 +2059,6 @@ int zcrypt_rng_device_add(void) goto out; } zcrypt_rng_buffer_index = 0; - if (!zcrypt_hwrng_seed) - zcrypt_rng_dev.quality = 0; rc = hwrng_register(&zcrypt_rng_dev); if (rc) goto out_free; diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c index a0ceeede450f1d580175f272e30cf37b2c1036f8..f0475b93ca730624a3fb0a383f3ee58c366bbb5f 100644 --- a/drivers/soc/qcom/qcom-geni-se.c +++ b/drivers/soc/qcom/qcom-geni-se.c @@ -81,19 +81,31 @@ */ #define MAX_CLK_PERF_LEVEL 32 -#define NUM_AHB_CLKS 2 +#define MAX_CLKS 2 /** * struct geni_wrapper - Data structure to represent the QUP Wrapper Core * @dev: Device pointer of the QUP wrapper core * @base: Base address of this instance of QUP wrapper core - * @ahb_clks: Handle to the primary & secondary AHB clocks + * @clks: Handle to the primary & optional secondary AHB clocks + * @num_clks: Count of clocks * @to_core: Core ICC path */ struct geni_wrapper { struct device *dev; void __iomem *base; - struct clk_bulk_data ahb_clks[NUM_AHB_CLKS]; + struct clk_bulk_data clks[MAX_CLKS]; + unsigned int num_clks; +}; + +/** + * struct geni_se_desc - Data structure to represent the QUP Wrapper resources + * @clks: Name of the primary & optional secondary AHB clocks + * @num_clks: Count of clock names + */ +struct geni_se_desc { + unsigned int num_clks; + const char * const *clks; }; static const char * const icc_path_names[] = {"qup-core", "qup-config", @@ -496,8 +508,7 @@ static void geni_se_clks_off(struct geni_se *se) struct geni_wrapper *wrapper = se->wrapper; clk_disable_unprepare(se->clk); - clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks); } /** @@ -528,15 +539,13 @@ static int geni_se_clks_on(struct geni_se *se) int ret; struct geni_wrapper *wrapper = se->wrapper; - ret = clk_bulk_prepare_enable(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + ret = clk_bulk_prepare_enable(wrapper->num_clks, wrapper->clks); if (ret) return ret; ret = clk_prepare_enable(se->clk); if (ret) - clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks), - wrapper->ahb_clks); + clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks); return ret; } @@ -887,11 +896,33 @@ static int geni_se_probe(struct platform_device *pdev) return PTR_ERR(wrapper->base); if (!has_acpi_companion(&pdev->dev)) { - wrapper->ahb_clks[0].id = "m-ahb"; - wrapper->ahb_clks[1].id = "s-ahb"; - ret = devm_clk_bulk_get(dev, NUM_AHB_CLKS, wrapper->ahb_clks); + const struct geni_se_desc *desc; + int i; + + desc = device_get_match_data(&pdev->dev); + if (!desc) + return -EINVAL; + + wrapper->num_clks = min_t(unsigned int, desc->num_clks, MAX_CLKS); + + for (i = 0; i < wrapper->num_clks; ++i) + wrapper->clks[i].id = desc->clks[i]; + + ret = of_count_phandle_with_args(dev->of_node, "clocks", "#clock-cells"); + if (ret < 0) { + dev_err(dev, "invalid clocks property at %pOF\n", dev->of_node); + return ret; + } + + if (ret < wrapper->num_clks) { + dev_err(dev, "invalid clocks count at %pOF, expected %d entries\n", + dev->of_node, wrapper->num_clks); + return -EINVAL; + } + + ret = devm_clk_bulk_get(dev, wrapper->num_clks, wrapper->clks); if (ret) { - dev_err(dev, "Err getting AHB clks %d\n", ret); + dev_err(dev, "Err getting clks %d\n", ret); return ret; } } @@ -901,8 +932,28 @@ static int geni_se_probe(struct platform_device *pdev) return devm_of_platform_populate(dev); } +static const char * const qup_clks[] = { + "m-ahb", + "s-ahb", +}; + +static const struct geni_se_desc qup_desc = { + .clks = qup_clks, + .num_clks = ARRAY_SIZE(qup_clks), +}; + +static const char * const i2c_master_hub_clks[] = { + "s-ahb", +}; + +static const struct geni_se_desc i2c_master_hub_desc = { + .clks = i2c_master_hub_clks, + .num_clks = ARRAY_SIZE(i2c_master_hub_clks), +}; + static const struct of_device_id geni_se_dt_match[] = { - { .compatible = "qcom,geni-se-qup", }, + { .compatible = "qcom,geni-se-qup", .data = &qup_desc }, + { .compatible = "qcom,geni-se-i2c-master-hub", .data = &i2c_master_hub_desc }, {} }; MODULE_DEVICE_TABLE(of, geni_se_dt_match); diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c index 2d1aeaba38a889b0a62025ca7b089aa430f40cbf..d5d4eae16771b358346c55b9c17d13f052cbaf0a 100644 --- a/drivers/thermal/gov_power_allocator.c +++ b/drivers/thermal/gov_power_allocator.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) "Power allocator: " fmt -#include #include #include diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c index e2c2673025a7ac45754574d2d824fa84fed1c37d..d247b48696cb63dc53373ddc27c397b67777d8ff 100644 --- a/drivers/thermal/imx8mm_thermal.c +++ b/drivers/thermal/imx8mm_thermal.c @@ -10,21 +10,40 @@ #include #include #include +#include #include #include #include +#include #include #include "thermal_core.h" +#include "thermal_hwmon.h" #define TER 0x0 /* TMU enable */ #define TPS 0x4 #define TRITSR 0x20 /* TMU immediate temp */ +/* TMU calibration data registers */ +#define TASR 0x28 +#define TASR_BUF_SLOPE_MASK GENMASK(19, 16) +#define TASR_BUF_VREF_MASK GENMASK(4, 0) /* TMU_V1 */ +#define TASR_BUF_VERF_SEL_MASK GENMASK(1, 0) /* TMU_V2 */ +#define TCALIV(n) (0x30 + ((n) * 4)) +#define TCALIV_EN BIT(31) +#define TCALIV_HR_MASK GENMASK(23, 16) /* TMU_V1 */ +#define TCALIV_RT_MASK GENMASK(7, 0) /* TMU_V1 */ +#define TCALIV_SNSR105C_MASK GENMASK(27, 16) /* TMU_V2 */ +#define TCALIV_SNSR25C_MASK GENMASK(11, 0) /* TMU_V2 */ +#define TRIM 0x3c +#define TRIM_BJT_CUR_MASK GENMASK(23, 20) +#define TRIM_BGR_MASK GENMASK(31, 28) +#define TRIM_VLSB_MASK GENMASK(15, 12) +#define TRIM_EN_CH BIT(7) #define TER_ADC_PD BIT(30) #define TER_EN BIT(31) -#define TRITSR_TEMP0_VAL_MASK 0xff -#define TRITSR_TEMP1_VAL_MASK 0xff0000 +#define TRITSR_TEMP0_VAL_MASK GENMASK(7, 0) +#define TRITSR_TEMP1_VAL_MASK GENMASK(23, 16) #define PROBE_SEL_ALL GENMASK(31, 30) @@ -32,6 +51,25 @@ #define SIGN_BIT BIT(7) #define TEMP_VAL_MASK GENMASK(6, 0) +/* TMU OCOTP calibration data bitfields */ +#define ANA0_EN BIT(25) +#define ANA0_BUF_VREF_MASK GENMASK(24, 20) +#define ANA0_BUF_SLOPE_MASK GENMASK(19, 16) +#define ANA0_HR_MASK GENMASK(15, 8) +#define ANA0_RT_MASK GENMASK(7, 0) +#define TRIM2_VLSB_MASK GENMASK(23, 20) +#define TRIM2_BGR_MASK GENMASK(19, 16) +#define TRIM2_BJT_CUR_MASK GENMASK(15, 12) +#define TRIM2_BUF_SLOP_SEL_MASK GENMASK(11, 8) +#define TRIM2_BUF_VERF_SEL_MASK GENMASK(7, 6) +#define TRIM3_TCA25_0_LSB_MASK GENMASK(31, 28) +#define TRIM3_TCA40_0_MASK GENMASK(27, 16) +#define TRIM4_TCA40_1_MASK GENMASK(31, 20) +#define TRIM4_TCA105_0_MASK GENMASK(19, 8) +#define TRIM4_TCA25_0_MSB_MASK GENMASK(7, 0) +#define TRIM5_TCA105_1_MASK GENMASK(23, 12) +#define TRIM5_TCA25_1_MASK GENMASK(11, 0) + #define VER1_TEMP_LOW_LIMIT 10000 #define VER2_TEMP_LOW_LIMIT -40000 #define VER2_TEMP_HIGH_LIMIT 125000 @@ -65,8 +103,14 @@ static int imx8mm_tmu_get_temp(void *data, int *temp) u32 val; val = readl_relaxed(tmu->base + TRITSR) & TRITSR_TEMP0_VAL_MASK; + + /* + * Do not validate against the V bit (bit 31) due to errata + * ERR051272: TMU: Bit 31 of registers TMU_TSCR/TMU_TRITSR/TMU_TRATSR invalid + */ + *temp = val * 1000; - if (*temp < VER1_TEMP_LOW_LIMIT) + if (*temp < VER1_TEMP_LOW_LIMIT || *temp > VER2_TEMP_HIGH_LIMIT) return -EAGAIN; return 0; @@ -128,6 +172,129 @@ static void imx8mm_tmu_probe_sel_all(struct imx8mm_tmu *tmu) writel_relaxed(val, tmu->base + TPS); } +static int imx8mm_tmu_probe_set_calib_v1(struct platform_device *pdev, + struct imx8mm_tmu *tmu) +{ + struct device *dev = &pdev->dev; + u32 ana0; + int ret; + + ret = nvmem_cell_read_u32(&pdev->dev, "calib", &ana0); + if (ret) { + dev_warn(dev, "Failed to read OCOTP nvmem cell (%d).\n", ret); + return ret; + } + + writel(FIELD_PREP(TASR_BUF_VREF_MASK, + FIELD_GET(ANA0_BUF_VREF_MASK, ana0)) | + FIELD_PREP(TASR_BUF_SLOPE_MASK, + FIELD_GET(ANA0_BUF_SLOPE_MASK, ana0)), + tmu->base + TASR); + + writel(FIELD_PREP(TCALIV_RT_MASK, FIELD_GET(ANA0_RT_MASK, ana0)) | + FIELD_PREP(TCALIV_HR_MASK, FIELD_GET(ANA0_HR_MASK, ana0)) | + ((ana0 & ANA0_EN) ? TCALIV_EN : 0), + tmu->base + TCALIV(0)); + + return 0; +} + +static int imx8mm_tmu_probe_set_calib_v2(struct platform_device *pdev, + struct imx8mm_tmu *tmu) +{ + struct device *dev = &pdev->dev; + struct nvmem_cell *cell; + u32 trim[4] = { 0 }; + size_t len; + void *buf; + + cell = nvmem_cell_get(dev, "calib"); + if (IS_ERR(cell)) + return PTR_ERR(cell); + + buf = nvmem_cell_read(cell, &len); + nvmem_cell_put(cell); + + if (IS_ERR(buf)) + return PTR_ERR(buf); + + memcpy(trim, buf, min(len, sizeof(trim))); + kfree(buf); + + if (len != 16) { + dev_err(dev, + "OCOTP nvmem cell length is %zu, must be 16.\n", len); + return -EINVAL; + } + + /* Blank sample hardware */ + if (!trim[0] && !trim[1] && !trim[2] && !trim[3]) { + /* Use a default 25C binary codes */ + writel(FIELD_PREP(TCALIV_SNSR25C_MASK, 0x63c), + tmu->base + TCALIV(0)); + writel(FIELD_PREP(TCALIV_SNSR25C_MASK, 0x63c), + tmu->base + TCALIV(1)); + return 0; + } + + writel(FIELD_PREP(TASR_BUF_VERF_SEL_MASK, + FIELD_GET(TRIM2_BUF_VERF_SEL_MASK, trim[0])) | + FIELD_PREP(TASR_BUF_SLOPE_MASK, + FIELD_GET(TRIM2_BUF_SLOP_SEL_MASK, trim[0])), + tmu->base + TASR); + + writel(FIELD_PREP(TRIM_BJT_CUR_MASK, + FIELD_GET(TRIM2_BJT_CUR_MASK, trim[0])) | + FIELD_PREP(TRIM_BGR_MASK, FIELD_GET(TRIM2_BGR_MASK, trim[0])) | + FIELD_PREP(TRIM_VLSB_MASK, FIELD_GET(TRIM2_VLSB_MASK, trim[0])) | + TRIM_EN_CH, + tmu->base + TRIM); + + writel(FIELD_PREP(TCALIV_SNSR25C_MASK, + FIELD_GET(TRIM3_TCA25_0_LSB_MASK, trim[1]) | + (FIELD_GET(TRIM4_TCA25_0_MSB_MASK, trim[2]) << 4)) | + FIELD_PREP(TCALIV_SNSR105C_MASK, + FIELD_GET(TRIM4_TCA105_0_MASK, trim[2])), + tmu->base + TCALIV(0)); + + writel(FIELD_PREP(TCALIV_SNSR25C_MASK, + FIELD_GET(TRIM5_TCA25_1_MASK, trim[3])) | + FIELD_PREP(TCALIV_SNSR105C_MASK, + FIELD_GET(TRIM5_TCA105_1_MASK, trim[3])), + tmu->base + TCALIV(1)); + + writel(FIELD_PREP(TCALIV_SNSR25C_MASK, + FIELD_GET(TRIM3_TCA40_0_MASK, trim[1])) | + FIELD_PREP(TCALIV_SNSR105C_MASK, + FIELD_GET(TRIM4_TCA40_1_MASK, trim[2])), + tmu->base + TCALIV(2)); + + return 0; +} + +static int imx8mm_tmu_probe_set_calib(struct platform_device *pdev, + struct imx8mm_tmu *tmu) +{ + struct device *dev = &pdev->dev; + + /* + * Lack of calibration data OCOTP reference is not considered + * fatal to retain compatibility with old DTs. It is however + * strongly recommended to update such old DTs to get correct + * temperature compensation values for each SoC. + */ + if (!of_find_property(pdev->dev.of_node, "nvmem-cells", NULL)) { + dev_warn(dev, + "No OCOTP nvmem reference found, SoC-specific calibration not loaded. Please update your DT.\n"); + return 0; + } + + if (tmu->socdata->version == TMU_VER1) + return imx8mm_tmu_probe_set_calib_v1(pdev, tmu); + + return imx8mm_tmu_probe_set_calib_v2(pdev, tmu); +} + static int imx8mm_tmu_probe(struct platform_device *pdev) { const struct thermal_soc_data *data; @@ -176,10 +343,17 @@ static int imx8mm_tmu_probe(struct platform_device *pdev) goto disable_clk; } tmu->sensors[i].hw_id = i; + + if (devm_thermal_add_hwmon_sysfs(tmu->sensors[i].tzd)) + dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n"); } platform_set_drvdata(pdev, tmu); + ret = imx8mm_tmu_probe_set_calib(pdev, tmu); + if (ret) + goto disable_clk; + /* enable all the probes for V2 TMU */ if (tmu->socdata->version == TMU_VER2) imx8mm_tmu_probe_sel_all(tmu); diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c index 5d92b70a5d53a3f1be716f6adcb16aff3323a618..4df925e3a80bd72951844e697319df9bb03d0512 100644 --- a/drivers/thermal/imx_sc_thermal.c +++ b/drivers/thermal/imx_sc_thermal.c @@ -127,11 +127,6 @@ static int imx_sc_thermal_probe(struct platform_device *pdev) return 0; } -static int imx_sc_thermal_remove(struct platform_device *pdev) -{ - return 0; -} - static int imx_sc_sensors[] = { IMX_SC_R_SYSTEM, IMX_SC_R_PMIC_0, -1 }; static const struct of_device_id imx_sc_thermal_table[] = { @@ -142,7 +137,6 @@ MODULE_DEVICE_TABLE(of, imx_sc_thermal_table); static struct platform_driver imx_sc_thermal_driver = { .probe = imx_sc_thermal_probe, - .remove = imx_sc_thermal_remove, .driver = { .name = "imx-sc-thermal", .of_match_table = imx_sc_thermal_table, diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c index 4bb7fddaa14378273dd34b3f6e1c12ba32a02a07..2e22bb82b7389e740c70dc66de24351372861bf0 100644 --- a/drivers/thermal/intel/therm_throt.c +++ b/drivers/thermal/intel/therm_throt.c @@ -194,7 +194,7 @@ static const struct attribute_group thermal_attr_group = { #define THERM_STATUS_PROCHOT_LOG BIT(1) #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15)) -#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26)) +#define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11)) /* * Clear the bits in package thermal status register for bit = 1 @@ -211,6 +211,9 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask) } else { msr = MSR_IA32_PACKAGE_THERM_STATUS; msr_val = THERM_STATUS_CLEAR_PKG_MASK; + if (boot_cpu_has(X86_FEATURE_HFI)) + msr_val |= BIT(26); + } msr_val &= ~bit_mask; diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c index 16b6bcf1bf4fa171b9e7a0aa413a943f0cef4394..031ea1091909a78549dfb2191b9a739eef3f7248 100644 --- a/drivers/thermal/k3_j72xx_bandgap.c +++ b/drivers/thermal/k3_j72xx_bandgap.c @@ -177,7 +177,6 @@ struct k3_j72xx_bandgap { struct device *dev; void __iomem *base; void __iomem *cfg2_base; - void __iomem *fuse_base; struct k3_thermal_data *ts_data[K3_VTM_MAX_NUM_TS]; }; @@ -249,14 +248,7 @@ static inline int k3_bgp_read_temp(struct k3_thermal_data *devdata, /* Get temperature callback function for thermal zone */ static int k3_thermal_get_temp(struct thermal_zone_device *tz, int *temp) { - struct k3_thermal_data *data = tz->devdata; - int ret = 0; - - ret = k3_bgp_read_temp(data, temp); - if (ret) - return ret; - - return ret; + return k3_bgp_read_temp(tz->devdata, temp); } static const struct thermal_zone_device_ops k3_of_thermal_ops = { @@ -283,7 +275,7 @@ static int k3_j72xx_bandgap_temp_to_adc_code(int temp) } static void get_efuse_values(int id, struct k3_thermal_data *data, int *err, - struct k3_j72xx_bandgap *bgp) + void __iomem *fuse_base) { int i, tmp, pow; int ct_offsets[5][K3_VTM_CORRECTION_TEMP_CNT] = { @@ -305,16 +297,16 @@ static void get_efuse_values(int id, struct k3_thermal_data *data, int *err, /* Extract the offset value using bit-mask */ if (ct_offsets[id][i] == -1 && i == 1) { /* 25C offset Case of Sensor 2 split between 2 regs */ - tmp = (readl(bgp->fuse_base + 0x8) & 0xE0000000) >> (29); - tmp |= ((readl(bgp->fuse_base + 0xC) & 0x1F) << 3); + tmp = (readl(fuse_base + 0x8) & 0xE0000000) >> (29); + tmp |= ((readl(fuse_base + 0xC) & 0x1F) << 3); pow = tmp & 0x80; } else if (ct_offsets[id][i] == -1 && i == 2) { /* 125C Case of Sensor 3 split between 2 regs */ - tmp = (readl(bgp->fuse_base + 0x4) & 0xF8000000) >> (27); - tmp |= ((readl(bgp->fuse_base + 0x8) & 0xF) << 5); + tmp = (readl(fuse_base + 0x4) & 0xF8000000) >> (27); + tmp |= ((readl(fuse_base + 0x8) & 0xF) << 5); pow = tmp & 0x100; } else { - tmp = readl(bgp->fuse_base + ct_offsets[id][i]); + tmp = readl(fuse_base + ct_offsets[id][i]); tmp &= ct_bm[id][i]; tmp = tmp >> __ffs(ct_bm[id][i]); @@ -347,7 +339,7 @@ static void print_look_up_table(struct device *dev, int *ref_table) } struct k3_j72xx_bandgap_data { - unsigned int has_errata_i2128; + const bool has_errata_i2128; }; static int k3_j72xx_bandgap_probe(struct platform_device *pdev) @@ -358,11 +350,12 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; struct k3_j72xx_bandgap *bgp; struct k3_thermal_data *data; - int workaround_needed = 0; + bool workaround_needed = false; const struct k3_j72xx_bandgap_data *driver_data; struct thermal_zone_device *ti_thermal; int *ref_table; struct err_values err_vals; + void __iomem *fuse_base; const s64 golden_factors[] = { -490019999999999936, @@ -393,15 +386,32 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev) if (IS_ERR(bgp->cfg2_base)) return PTR_ERR(bgp->cfg2_base); - res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - bgp->fuse_base = devm_ioremap_resource(dev, res); - if (IS_ERR(bgp->fuse_base)) - return PTR_ERR(bgp->fuse_base); - driver_data = of_device_get_match_data(dev); if (driver_data) workaround_needed = driver_data->has_errata_i2128; + /* + * Some of TI's J721E SoCs require a software trimming procedure + * for the temperature monitors to function properly. To determine + * if this particular SoC is NOT affected, both bits in the + * WKUP_SPARE_FUSE0[31:30] will be set (0xC0000000) indicating + * when software trimming should NOT be applied. + * + * https://www.ti.com/lit/er/sprz455c/sprz455c.pdf + */ + if (workaround_needed) { + res = platform_get_resource(pdev, IORESOURCE_MEM, 2); + fuse_base = devm_ioremap_resource(dev, res); + if (IS_ERR(fuse_base)) + return PTR_ERR(fuse_base); + + if ((readl(fuse_base) & 0xc0000000) == 0xc0000000) + workaround_needed = false; + } + + dev_dbg(bgp->dev, "Work around %sneeded\n", + workaround_needed ? "" : "not "); + pm_runtime_enable(dev); ret = pm_runtime_get_sync(dev); if (ret < 0) { @@ -434,13 +444,6 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev) goto err_free_ref_table; } - /* Workaround not needed if bit30/bit31 is set even for J721e */ - if (workaround_needed && (readl(bgp->fuse_base + 0x0) & 0xc0000000) == 0xc0000000) - workaround_needed = false; - - dev_dbg(bgp->dev, "Work around %sneeded\n", - workaround_needed ? "not " : ""); - if (!workaround_needed) init_table(5, ref_table, golden_factors); else @@ -459,7 +462,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev) err_vals.refs[1] = PLUS30CREF; err_vals.refs[2] = PLUS125CREF; err_vals.refs[3] = PLUS150CREF; - get_efuse_values(id, &data[id], err_vals.errs, bgp); + get_efuse_values(id, &data[id], err_vals.errs, fuse_base); } if (id == 0 && workaround_needed) @@ -529,11 +532,11 @@ static int k3_j72xx_bandgap_remove(struct platform_device *pdev) } static const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j721e_data = { - .has_errata_i2128 = 1, + .has_errata_i2128 = true, }; static const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j7200_data = { - .has_errata_i2128 = 0, + .has_errata_i2128 = false, }; static const struct of_device_id of_k3_j72xx_bandgap_match[] = { diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c index d3d9b9fa49e8191d7bcd8a4a506ea82f02db2685..4122a51e987414bf3c12bce3c83fc1b95c33b35d 100644 --- a/drivers/thermal/qcom/lmh.c +++ b/drivers/thermal/qcom/lmh.c @@ -45,7 +45,7 @@ static irqreturn_t lmh_handle_irq(int hw_irq, void *data) if (irq) generic_handle_irq(irq); - return 0; + return IRQ_HANDLED; } static void lmh_enable_interrupt(struct irq_data *d) diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c index 1b2c43eab27d12b00235e27c09b0f2745ff62107..ff47fc9ac9c5e872021f687bb567e8b767c9f448 100644 --- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c +++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c @@ -678,7 +678,7 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm) &adc_tm5_thermal_ops); if (IS_ERR(tzd)) { if (PTR_ERR(tzd) == -ENODEV) { - dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n", + dev_dbg(adc_tm->dev, "thermal sensor on channel %d is not used\n", adc_tm->channels[i].channel); continue; } @@ -1030,10 +1030,8 @@ static int adc_tm5_probe(struct platform_device *pdev) return irq; ret = adc_tm5_get_dt_data(adc_tm, node); - if (ret) { - dev_err(dev, "get dt data failed: %d\n", ret); - return ret; - } + if (ret) + return dev_err_probe(dev, ret, "get dt data failed\n"); ret = adc_tm->data->init(adc_tm); if (ret) { diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c index be785ab37e53d5f877212980b166605e7afba62e..ad84978109e6f7a4e53751b286e1ca9af6ebf916 100644 --- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c +++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c @@ -252,7 +252,8 @@ static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip, disable_s2_shutdown = true; else dev_warn(chip->dev, - "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n"); + "No ADC is configured and critical temperature %d mC is above the maximum stage 2 threshold of %ld mC! Configuring stage 2 shutdown at %ld mC.\n", + temp, stage2_threshold_max, stage2_threshold_max); } skip: diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c index 67c1748cdf734c9d7c39a44c118037def6427800..4585904fb38002bb25c385be4cedc9e860745480 100644 --- a/drivers/thermal/qcom/tsens-8960.c +++ b/drivers/thermal/qcom/tsens-8960.c @@ -269,9 +269,12 @@ static const struct tsens_ops ops_8960 = { static struct tsens_features tsens_8960_feat = { .ver_major = VER_0, .crit_int = 0, + .combo_int = 0, .adc = 1, .srot_split = 0, .max_sensors = 11, + .trip_min_temp = -40000, + .trip_max_temp = 120000, }; struct tsens_plat_data data_8960 = { diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c index 327f37202c69fff1b853dda82d396c730107529a..04d012e4f7288221be16cbad8ba411633b9b41ff 100644 --- a/drivers/thermal/qcom/tsens-v0_1.c +++ b/drivers/thermal/qcom/tsens-v0_1.c @@ -539,9 +539,12 @@ static int calibrate_9607(struct tsens_priv *priv) static struct tsens_features tsens_v0_1_feat = { .ver_major = VER_0_1, .crit_int = 0, + .combo_int = 0, .adc = 1, .srot_split = 1, .max_sensors = 11, + .trip_min_temp = -40000, + .trip_max_temp = 120000, }; static const struct reg_field tsens_v0_1_regfields[MAX_REGFIELDS] = { diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c index 573e261ccca74e7adbfd233e02a27adad610ce65..1d7f8a80bd13a22446ca03e6e1df1cc3f8edc86f 100644 --- a/drivers/thermal/qcom/tsens-v1.c +++ b/drivers/thermal/qcom/tsens-v1.c @@ -302,9 +302,12 @@ static int calibrate_8976(struct tsens_priv *priv) static struct tsens_features tsens_v1_feat = { .ver_major = VER_1_X, .crit_int = 0, + .combo_int = 0, .adc = 1, .srot_split = 1, .max_sensors = 11, + .trip_min_temp = -40000, + .trip_max_temp = 120000, }; static const struct reg_field tsens_v1_regfields[MAX_REGFIELDS] = { diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c index b293ed32174b5530dad894ad427dbe40a525ac64..29a61d2d6ca312c715a086e848ae0978aaf7dbd2 100644 --- a/drivers/thermal/qcom/tsens-v2.c +++ b/drivers/thermal/qcom/tsens-v2.c @@ -31,9 +31,23 @@ static struct tsens_features tsens_v2_feat = { .ver_major = VER_2_X, .crit_int = 1, + .combo_int = 0, .adc = 0, .srot_split = 1, .max_sensors = 16, + .trip_min_temp = -40000, + .trip_max_temp = 120000, +}; + +static struct tsens_features ipq8074_feat = { + .ver_major = VER_2_X, + .crit_int = 1, + .combo_int = 1, + .adc = 0, + .srot_split = 1, + .max_sensors = 16, + .trip_min_temp = 0, + .trip_max_temp = 204000, }; static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = { @@ -101,6 +115,12 @@ struct tsens_plat_data data_tsens_v2 = { .fields = tsens_v2_regfields, }; +struct tsens_plat_data data_ipq8074 = { + .ops = &ops_generic_v2, + .feat = &ipq8074_feat, + .fields = tsens_v2_regfields, +}; + /* Kept around for backward compatibility with old msm8996.dtsi */ struct tsens_plat_data data_8996 = { .num_sensors = 13, diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c index b1b10005fb286e81833f0d52df9e58d7289c3b7d..b5b136ff323f9a38fe58f335c5433acb97868a13 100644 --- a/drivers/thermal/qcom/tsens.c +++ b/drivers/thermal/qcom/tsens.c @@ -532,6 +532,27 @@ static irqreturn_t tsens_irq_thread(int irq, void *data) return IRQ_HANDLED; } +/** + * tsens_combined_irq_thread() - Threaded interrupt handler for combined interrupts + * @irq: irq number + * @data: tsens controller private data + * + * Handle the combined interrupt as if it were 2 separate interrupts, so call the + * critical handler first and then the up/low one. + * + * Return: IRQ_HANDLED + */ +static irqreturn_t tsens_combined_irq_thread(int irq, void *data) +{ + irqreturn_t ret; + + ret = tsens_critical_irq_thread(irq, data); + if (ret != IRQ_HANDLED) + return ret; + + return tsens_irq_thread(irq, data); +} + static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high) { struct tsens_sensor *s = tz->devdata; @@ -552,8 +573,8 @@ static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high) dev_dbg(dev, "[%u] %s: proposed thresholds: (%d:%d)\n", hw_id, __func__, low, high); - cl_high = clamp_val(high, -40000, 120000); - cl_low = clamp_val(low, -40000, 120000); + cl_high = clamp_val(high, priv->feat->trip_min_temp, priv->feat->trip_max_temp); + cl_low = clamp_val(low, priv->feat->trip_min_temp, priv->feat->trip_max_temp); high_val = tsens_mC_to_hw(s, cl_high); low_val = tsens_mC_to_hw(s, cl_low); @@ -692,7 +713,7 @@ static int dbg_version_show(struct seq_file *s, void *data) return ret; seq_printf(s, "%d.%d.%d\n", maj_ver, min_ver, step_ver); } else { - seq_puts(s, "0.1.0\n"); + seq_printf(s, "0.%d.0\n", priv->feat->ver_major); } return 0; @@ -704,21 +725,14 @@ DEFINE_SHOW_ATTRIBUTE(dbg_sensors); static void tsens_debug_init(struct platform_device *pdev) { struct tsens_priv *priv = platform_get_drvdata(pdev); - struct dentry *root, *file; - root = debugfs_lookup("tsens", NULL); - if (!root) + priv->debug_root = debugfs_lookup("tsens", NULL); + if (!priv->debug_root) priv->debug_root = debugfs_create_dir("tsens", NULL); - else - priv->debug_root = root; - - file = debugfs_lookup("version", priv->debug_root); - if (!file) - debugfs_create_file("version", 0444, priv->debug_root, - pdev, &dbg_version_fops); /* A directory for each instance of the TSENS IP */ priv->debug = debugfs_create_dir(dev_name(&pdev->dev), priv->debug_root); + debugfs_create_file("version", 0444, priv->debug, pdev, &dbg_version_fops); debugfs_create_file("sensors", 0444, priv->debug, pdev, &dbg_sensors_fops); } #else @@ -918,8 +932,6 @@ int __init init_common(struct tsens_priv *priv) if (tsens_version(priv) >= VER_0_1) tsens_enable_irq(priv); - tsens_debug_init(op); - err_put_device: put_device(&op->dev); return ret; @@ -959,6 +971,9 @@ static const struct of_device_id tsens_table[] = { { .compatible = "qcom,ipq8064-tsens", .data = &data_8960, + }, { + .compatible = "qcom,ipq8074-tsens", + .data = &data_ipq8074, }, { .compatible = "qcom,mdm9607-tsens", .data = &data_9607, @@ -1071,13 +1086,18 @@ static int tsens_register(struct tsens_priv *priv) tsens_mC_to_hw(priv->sensor, 0)); } - ret = tsens_register_irq(priv, "uplow", tsens_irq_thread); - if (ret < 0) - return ret; + if (priv->feat->combo_int) { + ret = tsens_register_irq(priv, "combined", + tsens_combined_irq_thread); + } else { + ret = tsens_register_irq(priv, "uplow", tsens_irq_thread); + if (ret < 0) + return ret; - if (priv->feat->crit_int) - ret = tsens_register_irq(priv, "critical", - tsens_critical_irq_thread); + if (priv->feat->crit_int) + ret = tsens_register_irq(priv, "critical", + tsens_critical_irq_thread); + } return ret; } @@ -1153,7 +1173,11 @@ static int tsens_probe(struct platform_device *pdev) } } - return tsens_register(priv); + ret = tsens_register(priv); + if (!ret) + tsens_debug_init(pdev); + + return ret; } static int tsens_remove(struct platform_device *pdev) diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h index ba05c82333565bfc6d610931b297aee02cbea5e0..899af128855f74dd376d5685aefde76013b8246a 100644 --- a/drivers/thermal/qcom/tsens.h +++ b/drivers/thermal/qcom/tsens.h @@ -493,19 +493,25 @@ enum regfield_ids { * struct tsens_features - Features supported by the IP * @ver_major: Major number of IP version * @crit_int: does the IP support critical interrupts? + * @combo_int: does the IP use one IRQ for up, low and critical thresholds? * @adc: do the sensors only output adc code (instead of temperature)? * @srot_split: does the IP neatly splits the register space into SROT and TM, * with SROT only being available to secure boot firmware? * @has_watchdog: does this IP support watchdog functionality? * @max_sensors: maximum sensors supported by this version of the IP + * @trip_min_temp: minimum trip temperature supported by this version of the IP + * @trip_max_temp: maximum trip temperature supported by this version of the IP */ struct tsens_features { unsigned int ver_major; unsigned int crit_int:1; + unsigned int combo_int:1; unsigned int adc:1; unsigned int srot_split:1; unsigned int has_watchdog:1; unsigned int max_sensors; + int trip_min_temp; + int trip_max_temp; }; /** @@ -591,6 +597,6 @@ extern struct tsens_plat_data data_8916, data_8939, data_8974, data_9607; extern struct tsens_plat_data data_tsens_v1, data_8976; /* TSENS v2 targets */ -extern struct tsens_plat_data data_8996, data_tsens_v2; +extern struct tsens_plat_data data_8996, data_ipq8074, data_tsens_v2; #endif /* __QCOM_TSENS_H__ */ diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c index 78feb802a87d2b1a344d548091afd8da39e149fa..e7834ccc7976bd33b36834063b0e8a796d87fa08 100644 --- a/drivers/thermal/st/stm_thermal.c +++ b/drivers/thermal/st/stm_thermal.c @@ -488,7 +488,6 @@ MODULE_DEVICE_TABLE(of, stm_thermal_of_match); static int stm_thermal_probe(struct platform_device *pdev) { struct stm_thermal_sensor *sensor; - struct resource *res; void __iomem *base; int ret; @@ -506,8 +505,7 @@ static int stm_thermal_probe(struct platform_device *pdev) sensor->dev = &pdev->dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - base = devm_ioremap_resource(&pdev->dev, res); + base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(base)) return PTR_ERR(base); diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c index d4b6335ace15faa1954691b05138efe5f9b7557d..aacba30bc10c19e5cea2abc5a793c200a95c02f0 100644 --- a/drivers/thermal/thermal_of.c +++ b/drivers/thermal/thermal_of.c @@ -604,13 +604,15 @@ struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor, if (IS_ERR(np)) { if (PTR_ERR(np) != -ENODEV) pr_err("Failed to find thermal zone for %pOFn id=%d\n", sensor, id); - return ERR_CAST(np); + ret = PTR_ERR(np); + goto out_kfree_of_ops; } trips = thermal_of_trips_init(np, &ntrips); if (IS_ERR(trips)) { pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id); - return ERR_CAST(trips); + ret = PTR_ERR(trips); + goto out_kfree_of_ops; } ret = thermal_of_monitor_init(np, &delay, &pdelay); @@ -659,6 +661,8 @@ out_kfree_tzp: kfree(tzp); out_kfree_trips: kfree(trips); +out_kfree_of_ops: + kfree(of_ops); return ERR_PTR(ret); } diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 67050a1a5b07f8f165e8a53b077b236c96985e78..a1c9a153018320788f4b39855e1f038aa23e46ae 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -878,7 +878,7 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev) */ static const struct soc_device_attribute soc_no_cpu_notifier[] = { { .machine = "OMAP4430" }, - { /* sentinel */ }, + { /* sentinel */ } }; /*** Device driver call backs ***/ diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c index 87067c3d6109b966fb7a35be4e0ed6da9f15c9af..6fb5140e29b9dd641be37ddd42034e4f6466a12f 100644 --- a/drivers/usb/misc/chaoskey.c +++ b/drivers/usb/misc/chaoskey.c @@ -200,7 +200,6 @@ static int chaoskey_probe(struct usb_interface *interface, dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name; dev->hwrng.read = chaoskey_rng_read; - dev->hwrng.quality = 1024; dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0); if (!dev->hwrng_registered) diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 86c381ceb9a1e95e95643dcce8a20fc74c5b1559..a8f54462946742661820ed9dd42560a54aa0ceb4 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -2,8 +2,9 @@ menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" select IOMMU_API - select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + depends on IOMMUFD || !IOMMUFD select INTERVAL_TREE + select VFIO_CONTAINER if IOMMUFD=n help VFIO provides a framework for secure userspace device drivers. See Documentation/driver-api/vfio.rst for more details. @@ -11,6 +12,18 @@ menuconfig VFIO If you don't know what to do here, say N. if VFIO +config VFIO_CONTAINER + bool "Support for the VFIO container /dev/vfio/vfio" + select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64) + default y + help + The VFIO container is the classic interface to VFIO for establishing + IOMMU mappings. If N is selected here then IOMMUFD must be used to + manage the mappings. + + Unless testing IOMMUFD say Y here. + +if VFIO_CONTAINER config VFIO_IOMMU_TYPE1 tristate default n @@ -20,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE depends on SPAPR_TCE_IOMMU default VFIO -config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - -config VFIO_VIRQFD - tristate - select EVENTFD - default n - config VFIO_NOIOMMU bool "VFIO No-IOMMU support" help @@ -43,6 +46,12 @@ config VFIO_NOIOMMU this mode since there is no IOMMU to provide DMA translation. If you don't know what to do here, say N. +endif + +config VFIO_VIRQFD + bool + select EVENTFD + default n source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index b693a1169286f8771f7d019e600268e45d208ba5..70e7dcb302efd20ddc2d4cb31fbffc1ea5e6b49f 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,16 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 -vfio_virqfd-y := virqfd.o - obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - iova_bitmap.o \ - container.o + group.o \ + iova_bitmap.o +vfio-$(CONFIG_IOMMUFD) += iommufd.o +vfio-$(CONFIG_VFIO_CONTAINER) += container.o +vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o -obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o -obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PLATFORM) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c index d74164abbf401d42cbcf42b6f5f7282dd7769de7..b7a9560ab25e486ae88c4dfe9feab750972a22fb 100644 --- a/drivers/vfio/container.c +++ b/drivers/vfio/container.c @@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device) device->group->container->iommu_data, device); } -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg) +static long +vfio_container_ioctl_check_extension(struct vfio_container *container, + unsigned long arg) { struct vfio_iommu_driver *driver; long ret = 0; @@ -511,14 +512,15 @@ void vfio_group_detach_container(struct vfio_group *group) vfio_container_put(container); } -int vfio_device_assign_container(struct vfio_device *device) +int vfio_group_use_container(struct vfio_group *group) { - struct vfio_group *group = device->group; - lockdep_assert_held(&group->group_lock); - if (!group->container || !group->container->iommu_driver || - WARN_ON(!group->container_users)) + /* + * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but + * VFIO_SET_IOMMU hasn't been done yet. + */ + if (!group->container->iommu_driver) return -EINVAL; if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) @@ -529,122 +531,56 @@ int vfio_device_assign_container(struct vfio_device *device) return 0; } -void vfio_device_unassign_container(struct vfio_device *device) +void vfio_group_unuse_container(struct vfio_group *group) { - mutex_lock(&device->group->group_lock); - WARN_ON(device->group->container_users <= 1); - device->group->container_users--; - fput(device->group->opened_file); - mutex_unlock(&device->group->group_lock); + lockdep_assert_held(&group->group_lock); + + WARN_ON(group->container_users <= 1); + group->container_users--; + fput(group->opened_file); } -/* - * Pin contiguous user pages and return their associated host pages for local - * domain only. - * @device [in] : device - * @iova [in] : starting IOVA of user pages to be pinned. - * @npage [in] : count of pages to be pinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - * @prot [in] : protection flags - * @pages[out] : array of host pages - * Return error or number of pages pinned. - * - * A driver may only call this function if the vfio_device was created - * by vfio_register_emulated_iommu_dev(). - */ -int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, - int npage, int prot, struct page **pages) +int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages) { - struct vfio_container *container; - struct vfio_group *group = device->group; - struct vfio_iommu_driver *driver; - int ret; - - if (!pages || !npage || !vfio_assert_device_open(device)) - return -EINVAL; + struct vfio_container *container = device->group->container; + struct iommu_group *iommu_group = device->group->iommu_group; + struct vfio_iommu_driver *driver = container->iommu_driver; if (npage > VFIO_PIN_PAGES_MAX_ENTRIES) return -E2BIG; - /* group->container cannot change while a vfio device is open */ - container = group->container; - driver = container->iommu_driver; - if (likely(driver && driver->ops->pin_pages)) - ret = driver->ops->pin_pages(container->iommu_data, - group->iommu_group, iova, - npage, prot, pages); - else - ret = -ENOTTY; - - return ret; + if (unlikely(!driver || !driver->ops->pin_pages)) + return -ENOTTY; + return driver->ops->pin_pages(container->iommu_data, iommu_group, iova, + npage, prot, pages); } -EXPORT_SYMBOL(vfio_pin_pages); -/* - * Unpin contiguous host pages for local domain only. - * @device [in] : device - * @iova [in] : starting address of user pages to be unpinned. - * @npage [in] : count of pages to be unpinned. This count should not - * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. - */ -void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; + struct vfio_container *container = device->group->container; if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES)) return; - if (WARN_ON(!vfio_assert_device_open(device))) - return; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; - - driver->ops->unpin_pages(container->iommu_data, iova, npage); + container->iommu_driver->ops->unpin_pages(container->iommu_data, iova, + npage); } -EXPORT_SYMBOL(vfio_unpin_pages); -/* - * This interface allows the CPUs to perform some sort of virtual DMA on - * behalf of the device. - * - * CPUs read/write from/into a range of IOVAs pointing to user space memory - * into/from a kernel buffer. - * - * As the read/write of user space memory is conducted via the CPUs and is - * not a real device DMA, it is not necessary to pin the user space memory. - * - * @device [in] : VFIO device - * @iova [in] : base IOVA of a user space buffer - * @data [in] : pointer to kernel buffer - * @len [in] : kernel buffer length - * @write : indicate read or write - * Return error code on failure or 0 on success. - */ -int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, - size_t len, bool write) +int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write) { - struct vfio_container *container; - struct vfio_iommu_driver *driver; - int ret = 0; - - if (!data || len <= 0 || !vfio_assert_device_open(device)) - return -EINVAL; - - /* group->container cannot change while a vfio device is open */ - container = device->group->container; - driver = container->iommu_driver; + struct vfio_container *container = device->group->container; + struct vfio_iommu_driver *driver = container->iommu_driver; - if (likely(driver && driver->ops->dma_rw)) - ret = driver->ops->dma_rw(container->iommu_data, - iova, data, len, write); - else - ret = -ENOTTY; - return ret; + if (unlikely(!driver || !driver->ops->dma_rw)) + return -ENOTTY; + return driver->ops->dma_rw(container->iommu_data, iova, data, len, + write); } -EXPORT_SYMBOL(vfio_dma_rw); int __init vfio_container_init(void) { @@ -678,3 +614,6 @@ void vfio_container_cleanup(void) misc_deregister(&vfio_dev); mutex_destroy(&vfio.iommu_drivers_lock); } + +MODULE_ALIAS_MISCDEV(VFIO_MINOR); +MODULE_ALIAS("devname:vfio/vfio"); diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c index b16874e913e4f527457f5864be5b942d60729a16..defeb8510ace52dd64921193cef5ef6aeca49866 100644 --- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c +++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c @@ -568,7 +568,6 @@ static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev) vfio_fsl_uninit_device(vdev); mutex_destroy(&vdev->igate); - vfio_free_device(core_vdev); } static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev) @@ -592,6 +591,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = { .read = vfio_fsl_mc_read, .write = vfio_fsl_mc_write, .mmap = vfio_fsl_mc_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct fsl_mc_driver vfio_fsl_mc_driver = { diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c new file mode 100644 index 0000000000000000000000000000000000000000..c5d8bf10495eccefc6353910b962b1fe39350038 --- /dev/null +++ b/drivers/vfio/group.c @@ -0,0 +1,877 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VFIO core + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + * + * Derived from original vfio: + * Copyright 2010 Cisco Systems, Inc. All rights reserved. + * Author: Tom Lyon, pugs@cisco.com + */ + +#include +#include +#include +#include "vfio.h" + +static struct vfio { + struct class *class; + struct list_head group_list; + struct mutex group_lock; /* locks group_list */ + struct ida group_ida; + dev_t group_devt; +} vfio; + +static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, + char *buf) +{ + struct vfio_device *it, *device = ERR_PTR(-ENODEV); + + mutex_lock(&group->device_lock); + list_for_each_entry(it, &group->device_list, group_next) { + int ret; + + if (it->ops->match) { + ret = it->ops->match(it, buf); + if (ret < 0) { + device = ERR_PTR(ret); + break; + } + } else { + ret = !strcmp(dev_name(it->dev), buf); + } + + if (ret && vfio_device_try_get_registration(it)) { + device = it; + break; + } + } + mutex_unlock(&group->device_lock); + + return device; +} + +/* + * VFIO Group fd, /dev/vfio/$GROUP + */ +static bool vfio_group_has_iommu(struct vfio_group *group) +{ + lockdep_assert_held(&group->group_lock); + /* + * There can only be users if there is a container, and if there is a + * container there must be users. + */ + WARN_ON(!group->container != !group->container_users); + + return group->container || group->iommufd; +} + +/* + * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or + * if there was no container to unset. Since the ioctl is called on + * the group, we know that still exists, therefore the only valid + * transition here is 1->0. + */ +static int vfio_group_ioctl_unset_container(struct vfio_group *group) +{ + int ret = 0; + + mutex_lock(&group->group_lock); + if (!vfio_group_has_iommu(group)) { + ret = -EINVAL; + goto out_unlock; + } + if (group->container) { + if (group->container_users != 1) { + ret = -EBUSY; + goto out_unlock; + } + vfio_group_detach_container(group); + } + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } + +out_unlock: + mutex_unlock(&group->group_lock); + return ret; +} + +static int vfio_group_ioctl_set_container(struct vfio_group *group, + int __user *arg) +{ + struct vfio_container *container; + struct iommufd_ctx *iommufd; + struct fd f; + int ret; + int fd; + + if (get_user(fd, arg)) + return -EFAULT; + + f = fdget(fd); + if (!f.file) + return -EBADF; + + mutex_lock(&group->group_lock); + if (vfio_group_has_iommu(group)) { + ret = -EINVAL; + goto out_unlock; + } + if (!group->iommu_group) { + ret = -ENODEV; + goto out_unlock; + } + + container = vfio_container_from_file(f.file); + if (container) { + ret = vfio_container_attach_group(container, group); + goto out_unlock; + } + + iommufd = iommufd_ctx_from_file(f.file); + if (!IS_ERR(iommufd)) { + u32 ioas_id; + + ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id); + if (ret) { + iommufd_ctx_put(group->iommufd); + goto out_unlock; + } + + group->iommufd = iommufd; + goto out_unlock; + } + + /* The FD passed is not recognized. */ + ret = -EBADFD; + +out_unlock: + mutex_unlock(&group->group_lock); + fdput(f); + return ret; +} + +static int vfio_device_group_open(struct vfio_device *device) +{ + int ret; + + mutex_lock(&device->group->group_lock); + if (!vfio_group_has_iommu(device->group)) { + ret = -EINVAL; + goto out_unlock; + } + + /* + * Here we pass the KVM pointer with the group under the lock. If the + * device driver will use it, it must obtain a reference and release it + * during close_device. + */ + ret = vfio_device_open(device, device->group->iommufd, + device->group->kvm); + +out_unlock: + mutex_unlock(&device->group->group_lock); + return ret; +} + +void vfio_device_group_close(struct vfio_device *device) +{ + mutex_lock(&device->group->group_lock); + vfio_device_close(device, device->group->iommufd); + mutex_unlock(&device->group->group_lock); +} + +static struct file *vfio_device_open_file(struct vfio_device *device) +{ + struct file *filep; + int ret; + + ret = vfio_device_group_open(device); + if (ret) + goto err_out; + + /* + * We can't use anon_inode_getfd() because we need to modify + * the f_mode flags directly to allow more than just ioctls + */ + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, + device, O_RDWR); + if (IS_ERR(filep)) { + ret = PTR_ERR(filep); + goto err_close_device; + } + + /* + * TODO: add an anon_inode interface to do this. + * Appears to be missing by lack of need rather than + * explicitly prevented. Now there's need. + */ + filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); + + if (device->group->type == VFIO_NO_IOMMU) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + /* + * On success the ref of device is moved to the file and + * put in vfio_device_fops_release() + */ + return filep; + +err_close_device: + vfio_device_group_close(device); +err_out: + return ERR_PTR(ret); +} + +static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, + char __user *arg) +{ + struct vfio_device *device; + struct file *filep; + char *buf; + int fdno; + int ret; + + buf = strndup_user(arg, PAGE_SIZE); + if (IS_ERR(buf)) + return PTR_ERR(buf); + + device = vfio_device_get_from_name(group, buf); + kfree(buf); + if (IS_ERR(device)) + return PTR_ERR(device); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + ret = fdno; + goto err_put_device; + } + + filep = vfio_device_open_file(device); + if (IS_ERR(filep)) { + ret = PTR_ERR(filep); + goto err_put_fdno; + } + + fd_install(fdno, filep); + return fdno; + +err_put_fdno: + put_unused_fd(fdno); +err_put_device: + vfio_device_put_registration(device); + return ret; +} + +static int vfio_group_ioctl_get_status(struct vfio_group *group, + struct vfio_group_status __user *arg) +{ + unsigned long minsz = offsetofend(struct vfio_group_status, flags); + struct vfio_group_status status; + + if (copy_from_user(&status, arg, minsz)) + return -EFAULT; + + if (status.argsz < minsz) + return -EINVAL; + + status.flags = 0; + + mutex_lock(&group->group_lock); + if (!group->iommu_group) { + mutex_unlock(&group->group_lock); + return -ENODEV; + } + + /* + * With the container FD the iommu_group_claim_dma_owner() is done + * during SET_CONTAINER but for IOMMFD this is done during + * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd + * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due + * to viability. + */ + if (vfio_group_has_iommu(group)) + status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | + VFIO_GROUP_FLAGS_VIABLE; + else if (!iommu_group_dma_owner_claimed(group->iommu_group)) + status.flags |= VFIO_GROUP_FLAGS_VIABLE; + mutex_unlock(&group->group_lock); + + if (copy_to_user(arg, &status, minsz)) + return -EFAULT; + return 0; +} + +static long vfio_group_fops_unl_ioctl(struct file *filep, + unsigned int cmd, unsigned long arg) +{ + struct vfio_group *group = filep->private_data; + void __user *uarg = (void __user *)arg; + + switch (cmd) { + case VFIO_GROUP_GET_DEVICE_FD: + return vfio_group_ioctl_get_device_fd(group, uarg); + case VFIO_GROUP_GET_STATUS: + return vfio_group_ioctl_get_status(group, uarg); + case VFIO_GROUP_SET_CONTAINER: + return vfio_group_ioctl_set_container(group, uarg); + case VFIO_GROUP_UNSET_CONTAINER: + return vfio_group_ioctl_unset_container(group); + default: + return -ENOTTY; + } +} + +static int vfio_group_fops_open(struct inode *inode, struct file *filep) +{ + struct vfio_group *group = + container_of(inode->i_cdev, struct vfio_group, cdev); + int ret; + + mutex_lock(&group->group_lock); + + /* + * drivers can be zero if this races with vfio_device_remove_group(), it + * will be stable at 0 under the group rwsem + */ + if (refcount_read(&group->drivers) == 0) { + ret = -ENODEV; + goto out_unlock; + } + + if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { + ret = -EPERM; + goto out_unlock; + } + + /* + * Do we need multiple instances of the group open? Seems not. + */ + if (group->opened_file) { + ret = -EBUSY; + goto out_unlock; + } + group->opened_file = filep; + filep->private_data = group; + ret = 0; +out_unlock: + mutex_unlock(&group->group_lock); + return ret; +} + +static int vfio_group_fops_release(struct inode *inode, struct file *filep) +{ + struct vfio_group *group = filep->private_data; + + filep->private_data = NULL; + + mutex_lock(&group->group_lock); + /* + * Device FDs hold a group file reference, therefore the group release + * is only called when there are no open devices. + */ + WARN_ON(group->notifier.head); + if (group->container) + vfio_group_detach_container(group); + if (group->iommufd) { + iommufd_ctx_put(group->iommufd); + group->iommufd = NULL; + } + group->opened_file = NULL; + mutex_unlock(&group->group_lock); + return 0; +} + +static const struct file_operations vfio_group_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = vfio_group_fops_unl_ioctl, + .compat_ioctl = compat_ptr_ioctl, + .open = vfio_group_fops_open, + .release = vfio_group_fops_release, +}; + +/* + * Group objects - create, release, get, put, search + */ +static struct vfio_group * +vfio_group_find_from_iommu(struct iommu_group *iommu_group) +{ + struct vfio_group *group; + + lockdep_assert_held(&vfio.group_lock); + + /* + * group->iommu_group from the vfio.group_list cannot be NULL + * under the vfio.group_lock. + */ + list_for_each_entry(group, &vfio.group_list, vfio_next) { + if (group->iommu_group == iommu_group) + return group; + } + return NULL; +} + +static void vfio_group_release(struct device *dev) +{ + struct vfio_group *group = container_of(dev, struct vfio_group, dev); + + mutex_destroy(&group->device_lock); + mutex_destroy(&group->group_lock); + WARN_ON(group->iommu_group); + ida_free(&vfio.group_ida, MINOR(group->dev.devt)); + kfree(group); +} + +static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, + enum vfio_group_type type) +{ + struct vfio_group *group; + int minor; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); + if (minor < 0) { + kfree(group); + return ERR_PTR(minor); + } + + device_initialize(&group->dev); + group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); + group->dev.class = vfio.class; + group->dev.release = vfio_group_release; + cdev_init(&group->cdev, &vfio_group_fops); + group->cdev.owner = THIS_MODULE; + + refcount_set(&group->drivers, 1); + mutex_init(&group->group_lock); + INIT_LIST_HEAD(&group->device_list); + mutex_init(&group->device_lock); + group->iommu_group = iommu_group; + /* put in vfio_group_release() */ + iommu_group_ref_get(iommu_group); + group->type = type; + BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); + + return group; +} + +static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, + enum vfio_group_type type) +{ + struct vfio_group *group; + struct vfio_group *ret; + int err; + + lockdep_assert_held(&vfio.group_lock); + + group = vfio_group_alloc(iommu_group, type); + if (IS_ERR(group)) + return group; + + err = dev_set_name(&group->dev, "%s%d", + group->type == VFIO_NO_IOMMU ? "noiommu-" : "", + iommu_group_id(iommu_group)); + if (err) { + ret = ERR_PTR(err); + goto err_put; + } + + err = cdev_device_add(&group->cdev, &group->dev); + if (err) { + ret = ERR_PTR(err); + goto err_put; + } + + list_add(&group->vfio_next, &vfio.group_list); + + return group; + +err_put: + put_device(&group->dev); + return ret; +} + +static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, + enum vfio_group_type type) +{ + struct iommu_group *iommu_group; + struct vfio_group *group; + int ret; + + iommu_group = iommu_group_alloc(); + if (IS_ERR(iommu_group)) + return ERR_CAST(iommu_group); + + ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); + if (ret) + goto out_put_group; + ret = iommu_group_add_device(iommu_group, dev); + if (ret) + goto out_put_group; + + mutex_lock(&vfio.group_lock); + group = vfio_create_group(iommu_group, type); + mutex_unlock(&vfio.group_lock); + if (IS_ERR(group)) { + ret = PTR_ERR(group); + goto out_remove_device; + } + iommu_group_put(iommu_group); + return group; + +out_remove_device: + iommu_group_remove_device(dev); +out_put_group: + iommu_group_put(iommu_group); + return ERR_PTR(ret); +} + +static bool vfio_group_has_device(struct vfio_group *group, struct device *dev) +{ + struct vfio_device *device; + + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (device->dev == dev) { + mutex_unlock(&group->device_lock); + return true; + } + } + mutex_unlock(&group->device_lock); + return false; +} + +static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) +{ + struct iommu_group *iommu_group; + struct vfio_group *group; + + iommu_group = iommu_group_get(dev); + if (!iommu_group && vfio_noiommu) { + /* + * With noiommu enabled, create an IOMMU group for devices that + * don't already have one, implying no IOMMU hardware/driver + * exists. Taint the kernel because we're about to give a DMA + * capable device to a user without IOMMU protection. + */ + group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); + if (!IS_ERR(group)) { + add_taint(TAINT_USER, LOCKDEP_STILL_OK); + dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); + } + return group; + } + + if (!iommu_group) + return ERR_PTR(-EINVAL); + + /* + * VFIO always sets IOMMU_CACHE because we offer no way for userspace to + * restore cache coherency. It has to be checked here because it is only + * valid for cases where we are using iommu groups. + */ + if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { + iommu_group_put(iommu_group); + return ERR_PTR(-EINVAL); + } + + mutex_lock(&vfio.group_lock); + group = vfio_group_find_from_iommu(iommu_group); + if (group) { + if (WARN_ON(vfio_group_has_device(group, dev))) + group = ERR_PTR(-EINVAL); + else + refcount_inc(&group->drivers); + } else { + group = vfio_create_group(iommu_group, VFIO_IOMMU); + } + mutex_unlock(&vfio.group_lock); + + /* The vfio_group holds a reference to the iommu_group */ + iommu_group_put(iommu_group); + return group; +} + +int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type) +{ + struct vfio_group *group; + + if (type == VFIO_IOMMU) + group = vfio_group_find_or_alloc(device->dev); + else + group = vfio_noiommu_group_alloc(device->dev, type); + + if (IS_ERR(group)) + return PTR_ERR(group); + + /* Our reference on group is moved to the device */ + device->group = group; + return 0; +} + +void vfio_device_remove_group(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + struct iommu_group *iommu_group; + + if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) + iommu_group_remove_device(device->dev); + + /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ + if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) + return; + list_del(&group->vfio_next); + + /* + * We could concurrently probe another driver in the group that might + * race vfio_device_remove_group() with vfio_get_group(), so we have to + * ensure that the sysfs is all cleaned up under lock otherwise the + * cdev_device_add() will fail due to the name aready existing. + */ + cdev_device_del(&group->cdev, &group->dev); + + mutex_lock(&group->group_lock); + /* + * These data structures all have paired operations that can only be + * undone when the caller holds a live reference on the device. Since + * all pairs must be undone these WARN_ON's indicate some caller did not + * properly hold the group reference. + */ + WARN_ON(!list_empty(&group->device_list)); + WARN_ON(group->notifier.head); + + /* + * Revoke all users of group->iommu_group. At this point we know there + * are no devices active because we are unplugging the last one. Setting + * iommu_group to NULL blocks all new users. + */ + if (group->container) + vfio_group_detach_container(group); + iommu_group = group->iommu_group; + group->iommu_group = NULL; + mutex_unlock(&group->group_lock); + mutex_unlock(&vfio.group_lock); + + iommu_group_put(iommu_group); + put_device(&group->dev); +} + +void vfio_device_group_register(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_add(&device->group_next, &device->group->device_list); + mutex_unlock(&device->group->device_lock); +} + +void vfio_device_group_unregister(struct vfio_device *device) +{ + mutex_lock(&device->group->device_lock); + list_del(&device->group_next); + mutex_unlock(&device->group->device_lock); +} + +int vfio_device_group_use_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + int ret = 0; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return -EINVAL; + + ret = vfio_group_use_container(group); + if (ret) + return ret; + vfio_device_container_register(device); + return 0; +} + +void vfio_device_group_unuse_iommu(struct vfio_device *device) +{ + struct vfio_group *group = device->group; + + lockdep_assert_held(&group->group_lock); + + if (WARN_ON(!group->container)) + return; + + vfio_device_container_unregister(device); + vfio_group_unuse_container(group); +} + +bool vfio_device_has_container(struct vfio_device *device) +{ + return device->group->container; +} + +/** + * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file + * @file: VFIO group file + * + * The returned iommu_group is valid as long as a ref is held on the file. This + * returns a reference on the group. This function is deprecated, only the SPAPR + * path in kvm should call it. + */ +struct iommu_group *vfio_file_iommu_group(struct file *file) +{ + struct vfio_group *group = file->private_data; + struct iommu_group *iommu_group = NULL; + + if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) + return NULL; + + if (!vfio_file_is_group(file)) + return NULL; + + mutex_lock(&group->group_lock); + if (group->iommu_group) { + iommu_group = group->iommu_group; + iommu_group_ref_get(iommu_group); + } + mutex_unlock(&group->group_lock); + return iommu_group; +} +EXPORT_SYMBOL_GPL(vfio_file_iommu_group); + +/** + * vfio_file_is_group - True if the file is usable with VFIO aPIS + * @file: VFIO group file + */ +bool vfio_file_is_group(struct file *file) +{ + return file->f_op == &vfio_group_fops; +} +EXPORT_SYMBOL_GPL(vfio_file_is_group); + +/** + * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file + * is always CPU cache coherent + * @file: VFIO group file + * + * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop + * bit in DMA transactions. A return of false indicates that the user has + * rights to access additional instructions such as wbinvd on x86. + */ +bool vfio_file_enforced_coherent(struct file *file) +{ + struct vfio_group *group = file->private_data; + struct vfio_device *device; + bool ret = true; + + if (!vfio_file_is_group(file)) + return true; + + /* + * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then + * any domain later attached to it will also not support it. If the cap + * is set then the iommu_domain eventually attached to the device/group + * must use a domain with enforce_cache_coherency(). + */ + mutex_lock(&group->device_lock); + list_for_each_entry(device, &group->device_list, group_next) { + if (!device_iommu_capable(device->dev, + IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) { + ret = false; + break; + } + } + mutex_unlock(&group->device_lock); + return ret; +} +EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); + +/** + * vfio_file_set_kvm - Link a kvm with VFIO drivers + * @file: VFIO group file + * @kvm: KVM to link + * + * When a VFIO device is first opened the KVM will be available in + * device->kvm if one was associated with the group. + */ +void vfio_file_set_kvm(struct file *file, struct kvm *kvm) +{ + struct vfio_group *group = file->private_data; + + if (!vfio_file_is_group(file)) + return; + + mutex_lock(&group->group_lock); + group->kvm = kvm; + mutex_unlock(&group->group_lock); +} +EXPORT_SYMBOL_GPL(vfio_file_set_kvm); + +/** + * vfio_file_has_dev - True if the VFIO file is a handle for device + * @file: VFIO file to check + * @device: Device that must be part of the file + * + * Returns true if given file has permission to manipulate the given device. + */ +bool vfio_file_has_dev(struct file *file, struct vfio_device *device) +{ + struct vfio_group *group = file->private_data; + + if (!vfio_file_is_group(file)) + return false; + + return group == device->group; +} +EXPORT_SYMBOL_GPL(vfio_file_has_dev); + +static char *vfio_devnode(struct device *dev, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); +} + +int __init vfio_group_init(void) +{ + int ret; + + ida_init(&vfio.group_ida); + mutex_init(&vfio.group_lock); + INIT_LIST_HEAD(&vfio.group_list); + + ret = vfio_container_init(); + if (ret) + return ret; + + /* /dev/vfio/$GROUP */ + vfio.class = class_create(THIS_MODULE, "vfio"); + if (IS_ERR(vfio.class)) { + ret = PTR_ERR(vfio.class); + goto err_group_class; + } + + vfio.class->devnode = vfio_devnode; + + ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); + if (ret) + goto err_alloc_chrdev; + return 0; + +err_alloc_chrdev: + class_destroy(vfio.class); + vfio.class = NULL; +err_group_class: + vfio_container_cleanup(); + return ret; +} + +void vfio_group_cleanup(void) +{ + WARN_ON(!list_empty(&vfio.group_list)); + ida_destroy(&vfio.group_ida); + unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); + class_destroy(vfio.class); + vfio.class = NULL; + vfio_container_cleanup(); +} diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c new file mode 100644 index 0000000000000000000000000000000000000000..4f82a6fa7c6c7fcc25da29c4a497ee673f34d618 --- /dev/null +++ b/drivers/vfio/iommufd.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES + */ +#include +#include + +#include "vfio.h" + +MODULE_IMPORT_NS(IOMMUFD); +MODULE_IMPORT_NS(IOMMUFD_VFIO); + +int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx) +{ + u32 ioas_id; + u32 device_id; + int ret; + + lockdep_assert_held(&vdev->dev_set->lock); + + /* + * If the driver doesn't provide this op then it means the device does + * not do DMA at all. So nothing to do. + */ + if (!vdev->ops->bind_iommufd) + return 0; + + ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id); + if (ret) + return ret; + + ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id); + if (ret) + goto err_unbind; + ret = vdev->ops->attach_ioas(vdev, &ioas_id); + if (ret) + goto err_unbind; + + /* + * The legacy path has no way to return the device id or the selected + * pt_id + */ + return 0; + +err_unbind: + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); + return ret; +} + +void vfio_iommufd_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->ops->unbind_iommufd) + vdev->ops->unbind_iommufd(vdev); +} + +/* + * The physical standard ops mean that the iommufd_device is bound to the + * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers + * using this ops set should call vfio_register_group_dev() + */ +int vfio_iommufd_physical_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + struct iommufd_device *idev; + + idev = iommufd_device_bind(ictx, vdev->dev, out_device_id); + if (IS_ERR(idev)) + return PTR_ERR(idev); + vdev->iommufd_device = idev; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind); + +void vfio_iommufd_physical_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_attached) { + iommufd_device_detach(vdev->iommufd_device); + vdev->iommufd_attached = false; + } + iommufd_device_unbind(vdev->iommufd_device); + vdev->iommufd_device = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind); + +int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + int rc; + + rc = iommufd_device_attach(vdev->iommufd_device, pt_id); + if (rc) + return rc; + vdev->iommufd_attached = true; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas); + +/* + * The emulated standard ops mean that vfio_device is going to use the + * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this + * ops set should call vfio_register_emulated_iommu_dev(). + */ + +static void vfio_emulated_unmap(void *data, unsigned long iova, + unsigned long length) +{ + struct vfio_device *vdev = data; + + vdev->ops->dma_unmap(vdev, iova, length); +} + +static const struct iommufd_access_ops vfio_user_ops = { + .needs_pin_pages = 1, + .unmap = vfio_emulated_unmap, +}; + +int vfio_iommufd_emulated_bind(struct vfio_device *vdev, + struct iommufd_ctx *ictx, u32 *out_device_id) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + vdev->iommufd_ictx = ictx; + iommufd_ctx_get(ictx); + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind); + +void vfio_iommufd_emulated_unbind(struct vfio_device *vdev) +{ + lockdep_assert_held(&vdev->dev_set->lock); + + if (vdev->iommufd_access) { + iommufd_access_destroy(vdev->iommufd_access); + vdev->iommufd_access = NULL; + } + iommufd_ctx_put(vdev->iommufd_ictx); + vdev->iommufd_ictx = NULL; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind); + +int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id) +{ + struct iommufd_access *user; + + lockdep_assert_held(&vdev->dev_set->lock); + + user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops, + vdev); + if (IS_ERR(user)) + return PTR_ERR(user); + vdev->iommufd_access = user; + return 0; +} +EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas); diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c index 6631e8befe1b2711d58fad544eac41dc829bd8ad..0848f920efb7c1c13521cc9d59ae96231cbf4481 100644 --- a/drivers/vfio/iova_bitmap.c +++ b/drivers/vfio/iova_bitmap.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE) @@ -295,11 +296,13 @@ void iova_bitmap_free(struct iova_bitmap *bitmap) */ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap) { - unsigned long remaining; + unsigned long remaining, bytes; + + bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff; remaining = bitmap->mapped_total_index - bitmap->mapped_base_index; remaining = min_t(unsigned long, remaining, - (bitmap->mapped.npages << PAGE_SHIFT) / sizeof(*bitmap->bitmap)); + bytes / sizeof(*bitmap->bitmap)); return remaining; } @@ -394,29 +397,27 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque, * Set the bits corresponding to the range [iova .. iova+length-1] in * the user bitmap. * - * Return: The number of bits set. */ void iova_bitmap_set(struct iova_bitmap *bitmap, unsigned long iova, size_t length) { struct iova_bitmap_map *mapped = &bitmap->mapped; - unsigned long offset = (iova - mapped->iova) >> mapped->pgshift; - unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift); - unsigned long page_idx = offset / BITS_PER_PAGE; - unsigned long page_offset = mapped->pgoff; - void *kaddr; - - offset = offset % BITS_PER_PAGE; + unsigned long cur_bit = ((iova - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; + unsigned long last_bit = (((iova + length - 1) - mapped->iova) >> + mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE; do { - unsigned long size = min(BITS_PER_PAGE - offset, nbits); + unsigned int page_idx = cur_bit / BITS_PER_PAGE; + unsigned int offset = cur_bit % BITS_PER_PAGE; + unsigned int nbits = min(BITS_PER_PAGE - offset, + last_bit - cur_bit + 1); + void *kaddr; kaddr = kmap_local_page(mapped->pages[page_idx]); - bitmap_set(kaddr + page_offset, offset, size); + bitmap_set(kaddr, offset, nbits); kunmap_local(kaddr); - page_offset = offset = 0; - nbits -= size; - page_idx++; - } while (nbits > 0); + cur_bit += nbits; + } while (cur_bit <= last_bit); } EXPORT_SYMBOL_GPL(iova_bitmap_set); diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c index 39eeca18a0f7c884a18827fbe6d4ccf2584920b9..0bba3b05c6c780aa081f2031b5ae38b05d9aefc0 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c @@ -360,8 +360,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, u32 que_iso_state; int ret; - if (migf->total_length < QM_MATCH_SIZE) - return -EINVAL; + if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done) + return 0; if (vf_data->acc_magic != ACC_DEV_MAGIC) { dev_err(dev, "failed to match ACC_DEV_MAGIC\n"); @@ -406,6 +406,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev, } hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state; + hisi_acc_vdev->match_done = true; return 0; } @@ -493,10 +494,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev, struct device *dev = &vf_qm->pdev->dev; int ret; - ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data); - if (ret) - return ret; - if (unlikely(qm_wait_dev_not_ready(vf_qm))) { /* Update state and return with match data */ vf_data->vf_qm_state = QM_NOT_READY; @@ -673,12 +670,6 @@ static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev) struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf; int ret; - /* Check dev compatibility */ - ret = vf_qm_check_match(hisi_acc_vdev, migf); - if (ret) { - dev_err(dev, "failed to match the VF!\n"); - return ret; - } /* Recover data to VF */ ret = vf_qm_load_data(hisi_acc_vdev, migf); if (ret) { @@ -732,6 +723,10 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu *pos += len; done = len; migf->total_length += len; + + ret = vf_qm_check_match(migf->hisi_acc_vdev, migf); + if (ret) + done = -EFAULT; out_unlock: mutex_unlock(&migf->lock); return done; @@ -764,9 +759,58 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + migf->hisi_acc_vdev = hisi_acc_vdev; return migf; } +static long hisi_acc_vf_precopy_ioctl(struct file *filp, + unsigned int cmd, unsigned long arg) +{ + struct hisi_acc_vf_migration_file *migf = filp->private_data; + struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev; + loff_t *pos = &filp->f_pos; + struct vfio_precopy_info info; + unsigned long minsz; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&hisi_acc_vdev->state_mutex); + if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) { + mutex_unlock(&hisi_acc_vdev->state_mutex); + return -EINVAL; + } + + mutex_lock(&migf->lock); + + if (migf->disabled) { + ret = -ENODEV; + goto out; + } + + if (*pos > migf->total_length) { + ret = -EINVAL; + goto out; + } + + info.dirty_bytes = 0; + info.initial_bytes = migf->total_length - *pos; + + ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0; +out: + mutex_unlock(&migf->lock); + mutex_unlock(&hisi_acc_vdev->state_mutex); + return ret; +} + static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { @@ -807,12 +851,14 @@ out_unlock: static const struct file_operations hisi_acc_vf_save_fops = { .owner = THIS_MODULE, .read = hisi_acc_vf_save_read, + .unlocked_ioctl = hisi_acc_vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = hisi_acc_vf_release_file, .llseek = no_llseek, }; static struct hisi_acc_vf_migration_file * -hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) +hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct hisi_acc_vf_migration_file *migf; int ret; @@ -832,8 +878,9 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + migf->hisi_acc_vdev = hisi_acc_vdev; - ret = vf_qm_state_save(hisi_acc_vdev, migf); + ret = vf_qm_get_match_data(hisi_acc_vdev, &migf->vf_data); if (ret) { fput(migf->filp); return ERR_PTR(ret); @@ -842,6 +889,44 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) return migf; } +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_pre_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev) +{ + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_open_saving_migf(hisi_acc_vdev); + if (IS_ERR(migf)) + return migf; + + migf->total_length = QM_MATCH_SIZE; + return migf; +} + +static struct hisi_acc_vf_migration_file * +hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, bool open) +{ + int ret; + struct hisi_acc_vf_migration_file *migf = NULL; + + if (open) { + /* + * Userspace didn't use PRECOPY support. Hence saving_migf + * is not opened yet. + */ + migf = hisi_acc_open_saving_migf(hisi_acc_vdev); + if (IS_ERR(migf)) + return migf; + } else { + migf = hisi_acc_vdev->saving_migf; + } + + ret = vf_qm_state_save(hisi_acc_vdev, migf); + if (ret) + return ERR_PTR(ret); + + return open ? migf : NULL; +} + static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev) { struct device *dev = &hisi_acc_vdev->vf_dev->dev; @@ -869,6 +954,31 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, u32 cur = hisi_acc_vdev->mig_state; int ret; + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) { + struct hisi_acc_vf_migration_file *migf; + + migf = hisi_acc_vf_pre_copy(hisi_acc_vdev); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + hisi_acc_vdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_STOP_COPY) { + struct hisi_acc_vf_migration_file *migf; + + ret = hisi_acc_vf_stop_device(hisi_acc_vdev); + if (ret) + return ERR_PTR(ret); + + migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, false); + if (IS_ERR(migf)) + return ERR_CAST(migf); + + return NULL; + } + if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) { ret = hisi_acc_vf_stop_device(hisi_acc_vdev); if (ret) @@ -879,7 +989,7 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { struct hisi_acc_vf_migration_file *migf; - migf = hisi_acc_vf_stop_copy(hisi_acc_vdev); + migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, true); if (IS_ERR(migf)) return ERR_CAST(migf); get_file(migf->filp); @@ -911,6 +1021,11 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev, return NULL; } + if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) { + hisi_acc_vf_disable_fds(hisi_acc_vdev); + return NULL; + } + if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) { hisi_acc_vf_start_device(hisi_acc_vdev); return NULL; @@ -957,6 +1072,14 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev, return res; } +static int +hisi_acc_vfio_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + *stop_copy_length = sizeof(struct acc_vf_data); + return 0; +} + static int hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev, enum vfio_device_mig_state *curr_state) @@ -1213,6 +1336,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev) static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = { .migration_set_state = hisi_acc_vfio_pci_set_device_state, .migration_get_state = hisi_acc_vfio_pci_get_device_state, + .migration_get_data_size = hisi_acc_vfio_pci_get_data_size, }; static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) @@ -1227,7 +1351,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev) hisi_acc_vdev->vf_dev = pdev; mutex_init(&hisi_acc_vdev->state_mutex); - core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY; + core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY; core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops; return vfio_pci_core_init_dev(core_vdev); @@ -1246,6 +1370,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = { .mmap = hisi_acc_vfio_pci_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { @@ -1261,6 +1388,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h index 67343325b320165499a6e17bea7e6d9cc9b7e7c3..dcabfeec6ca19d4334e746536c5e15a3572972d5 100644 --- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h +++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h @@ -91,12 +91,14 @@ struct hisi_acc_vf_migration_file { struct mutex lock; bool disabled; + struct hisi_acc_vf_core_device *hisi_acc_vdev; struct acc_vf_data vf_data; size_t total_length; }; struct hisi_acc_vf_core_device { struct vfio_pci_core_device core_device; + u8 match_done:1; u8 deferred_reset:1; /* For migration state */ struct mutex state_mutex; diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c index c604b70437a5d5e56f6f8d71808a8c61c1aa0490..64e68d13cb98fc1812b8f096cc23c80ef2919bc2 100644 --- a/drivers/vfio/pci/mlx5/cmd.c +++ b/drivers/vfio/pci/mlx5/cmd.c @@ -14,18 +14,36 @@ _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) { + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {}; u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {}; + int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; + /* + * In case PRE_COPY is used, saving_migf is exposed while the device is + * running. Make sure to run only once there is no active save command. + * Running both in parallel, might end-up with a failure in the save + * command once it will try to turn on 'tracking' on a suspended device. + */ + if (migf) { + err = wait_for_completion_interruptible(&migf->save_comp); + if (err) + return err; + } + MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA); MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(suspend_vhca_in, in, op_mod, op_mod); - return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); + err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out); + if (migf) + complete(&migf->save_comp); + + return err; } int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) @@ -45,23 +63,54 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod) } int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size) + size_t *state_size, u8 query_flags) { u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {}; u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {}; + bool inc = query_flags & MLX5VF_QUERY_INC; int ret; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; + /* + * In case PRE_COPY is used, saving_migf is exposed while device is + * running. Make sure to run only once there is no active save command. + * Running both in parallel, might end-up with a failure in the + * incremental query command on un-tracked vhca. + */ + if (inc) { + ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp); + if (ret) + return ret; + if (mvdev->saving_migf->state == + MLX5_MIGF_STATE_PRE_COPY_ERROR) { + /* + * In case we had a PRE_COPY error, only query full + * image for final image + */ + if (!(query_flags & MLX5VF_QUERY_FINAL)) { + *state_size = 0; + complete(&mvdev->saving_migf->save_comp); + return 0; + } + query_flags &= ~MLX5VF_QUERY_INC; + } + } + MLX5_SET(query_vhca_migration_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE); MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id); MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0); + MLX5_SET(query_vhca_migration_state_in, in, incremental, + query_flags & MLX5VF_QUERY_INC); ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in, out); + if (inc) + complete(&mvdev->saving_migf->save_comp); + if (ret) return ret; @@ -173,6 +222,11 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization)) mvdev->core_device.vdev.log_ops = log_ops; + if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) && + MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state)) + mvdev->core_device.vdev.migration_flags |= + VFIO_MIGRATION_PRE_COPY; + end: mlx5_vf_put_core_dev(mvdev->mdev); } @@ -210,11 +264,11 @@ err_exec: } static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, - struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf, struct mlx5_vhca_recv_buf *recv_buf, u32 *mkey) { - size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) : + size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) : recv_buf->npages; int err = 0, inlen; __be64 *mtt; @@ -232,10 +286,10 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, DIV_ROUND_UP(npages, 2)); mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); - if (migf) { + if (buf) { struct sg_dma_page_iter dma_iter; - for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0) + for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0) *mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter)); } else { int i; @@ -255,35 +309,195 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(mkc, mkc, qpn, 0xffffff); MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2)); - MLX5_SET64(mkc, mkc, len, - migf ? migf->total_length : (npages * PAGE_SIZE)); + MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE); err = mlx5_core_create_mkey(mdev, mkey, in, inlen); kvfree(in); return err; } +static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev; + struct mlx5_core_dev *mdev = mvdev->mdev; + int ret; + + lockdep_assert_held(&mvdev->state_mutex); + if (mvdev->mdev_detach) + return -ENOTCONN; + + if (buf->dmaed || !buf->allocated_length) + return -EINVAL; + + ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); + if (ret) + return ret; + + ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey); + if (ret) + goto err; + + buf->dmaed = true; + + return 0; +err: + dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0); + return ret; +} + +void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + struct mlx5_vf_migration_file *migf = buf->migf; + struct sg_page_iter sg_iter; + + lockdep_assert_held(&migf->mvdev->state_mutex); + WARN_ON(migf->mvdev->mdev_detach); + + if (buf->dmaed) { + mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey); + dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt, + buf->dma_dir, 0); + } + + /* Undo alloc_pages_bulk_array() */ + for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0) + __free_page(sg_page_iter_page(&sg_iter)); + sg_free_append_table(&buf->table); + kfree(buf); +} + +struct mlx5_vhca_data_buffer * +mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, + enum dma_data_direction dma_dir) +{ + struct mlx5_vhca_data_buffer *buf; + int ret; + + buf = kzalloc(sizeof(*buf), GFP_KERNEL); + if (!buf) + return ERR_PTR(-ENOMEM); + + buf->dma_dir = dma_dir; + buf->migf = migf; + if (length) { + ret = mlx5vf_add_migration_pages(buf, + DIV_ROUND_UP_ULL(length, PAGE_SIZE)); + if (ret) + goto end; + + if (dma_dir != DMA_NONE) { + ret = mlx5vf_dma_data_buffer(buf); + if (ret) + goto end; + } + } + + return buf; +end: + mlx5vf_free_data_buffer(buf); + return ERR_PTR(ret); +} + +void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf) +{ + spin_lock_irq(&buf->migf->list_lock); + list_add_tail(&buf->buf_elm, &buf->migf->avail_list); + spin_unlock_irq(&buf->migf->list_lock); +} + +struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir) +{ + struct mlx5_vhca_data_buffer *buf, *temp_buf; + struct list_head free_list; + + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return ERR_PTR(-ENOTCONN); + + INIT_LIST_HEAD(&free_list); + + spin_lock_irq(&migf->list_lock); + list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) { + if (buf->dma_dir == dma_dir) { + list_del_init(&buf->buf_elm); + if (buf->allocated_length >= length) { + spin_unlock_irq(&migf->list_lock); + goto found; + } + /* + * Prevent holding redundant buffers. Put in a free + * list and call at the end not under the spin lock + * (&migf->list_lock) to mlx5vf_free_data_buffer which + * might sleep. + */ + list_add(&buf->buf_elm, &free_list); + } + } + spin_unlock_irq(&migf->list_lock); + buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir); + +found: + while ((temp_buf = list_first_entry_or_null(&free_list, + struct mlx5_vhca_data_buffer, buf_elm))) { + list_del(&temp_buf->buf_elm); + mlx5vf_free_data_buffer(temp_buf); + } + + return buf; +} + void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work) { struct mlx5vf_async_data *async_data = container_of(_work, struct mlx5vf_async_data, work); struct mlx5_vf_migration_file *migf = container_of(async_data, struct mlx5_vf_migration_file, async_data); - struct mlx5_core_dev *mdev = migf->mvdev->mdev; mutex_lock(&migf->lock); if (async_data->status) { - migf->is_err = true; + mlx5vf_put_data_buffer(async_data->buf); + if (async_data->header_buf) + mlx5vf_put_data_buffer(async_data->header_buf); + if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR) + migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR; + else + migf->state = MLX5_MIGF_STATE_ERROR; wake_up_interruptible(&migf->poll_wait); } mutex_unlock(&migf->lock); - - mlx5_core_destroy_mkey(mdev, async_data->mkey); - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); - mlx5_core_dealloc_pd(mdev, async_data->pdn); kvfree(async_data->out); + complete(&migf->save_comp); fput(migf->filp); } +static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf, + size_t image_size) +{ + struct mlx5_vf_migration_file *migf = header_buf->migf; + struct mlx5_vf_migration_header header = {}; + unsigned long flags; + struct page *page; + u8 *to_buff; + + header.image_size = cpu_to_le64(image_size); + page = mlx5vf_get_migration_page(header_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + memcpy(to_buff, &header, sizeof(header)); + kunmap_local(to_buff); + header_buf->length = sizeof(header); + header_buf->header_image_size = image_size; + header_buf->start_pos = header_buf->migf->max_pos; + migf->max_pos += header_buf->length; + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&header_buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + return 0; +} + static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) { struct mlx5vf_async_data *async_data = container_of(context, @@ -292,67 +506,96 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context) struct mlx5_vf_migration_file, async_data); if (!status) { - WRITE_ONCE(migf->total_length, - MLX5_GET(save_vhca_state_out, async_data->out, - actual_image_size)); + size_t image_size; + unsigned long flags; + + image_size = MLX5_GET(save_vhca_state_out, async_data->out, + actual_image_size); + if (async_data->header_buf) { + status = add_buf_header(async_data->header_buf, image_size); + if (status) + goto err; + } + async_data->buf->length = image_size; + async_data->buf->start_pos = migf->max_pos; + migf->max_pos += async_data->buf->length; + spin_lock_irqsave(&migf->list_lock, flags); + list_add_tail(&async_data->buf->buf_elm, &migf->buf_list); + spin_unlock_irqrestore(&migf->list_lock, flags); + migf->state = async_data->last_chunk ? + MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY; wake_up_interruptible(&migf->poll_wait); } +err: /* * The error and the cleanup flows can't run from an * interrupt context */ + if (status == -EREMOTEIO) + status = MLX5_GET(save_vhca_state_out, async_data->out, status); async_data->status = status; queue_work(migf->mvdev->cb_wq, &async_data->work); } int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf) + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf, bool inc, + bool track) { u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out); u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; + struct mlx5_vhca_data_buffer *header_buf = NULL; struct mlx5vf_async_data *async_data; - struct mlx5_core_dev *mdev; - u32 pdn, mkey; int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; - mdev = mvdev->mdev; - err = mlx5_core_alloc_pd(mdev, &pdn); + err = wait_for_completion_interruptible(&migf->save_comp); if (err) return err; - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, - 0); - if (err) - goto err_dma_map; - - err = _create_mkey(mdev, pdn, migf, NULL, &mkey); - if (err) - goto err_create_mkey; + if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR) + /* + * In case we had a PRE_COPY error, SAVE is triggered only for + * the final image, read device full image. + */ + inc = false; MLX5_SET(save_vhca_state_in, in, opcode, MLX5_CMD_OP_SAVE_VHCA_STATE); MLX5_SET(save_vhca_state_in, in, op_mod, 0); MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id); - MLX5_SET(save_vhca_state_in, in, mkey, mkey); - MLX5_SET(save_vhca_state_in, in, size, migf->total_length); + MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey); + MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length); + MLX5_SET(save_vhca_state_in, in, incremental, inc); + MLX5_SET(save_vhca_state_in, in, set_track, track); async_data = &migf->async_data; + async_data->buf = buf; + async_data->last_chunk = !track; async_data->out = kvzalloc(out_size, GFP_KERNEL); if (!async_data->out) { err = -ENOMEM; goto err_out; } - /* no data exists till the callback comes back */ - migf->total_length = 0; + if (MLX5VF_PRE_COPY_SUPP(mvdev)) { + header_buf = mlx5vf_get_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(header_buf)) { + err = PTR_ERR(header_buf); + goto err_free; + } + } + + if (async_data->last_chunk) + migf->state = MLX5_MIGF_STATE_SAVE_LAST; + + async_data->header_buf = header_buf; get_file(migf->filp); - async_data->mkey = mkey; - async_data->pdn = pdn; err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in), async_data->out, out_size, mlx5vf_save_callback, @@ -363,68 +606,92 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, return 0; err_exec: + if (header_buf) + mlx5vf_put_data_buffer(header_buf); fput(migf->filp); +err_free: kvfree(async_data->out); err_out: - mlx5_core_destroy_mkey(mdev, mkey); -err_create_mkey: - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0); -err_dma_map: - mlx5_core_dealloc_pd(mdev, pdn); + complete(&migf->save_comp); return err; } int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf) + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf) { - struct mlx5_core_dev *mdev; - u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {}; - u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {}; - u32 pdn, mkey; + u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {}; + u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {}; int err; lockdep_assert_held(&mvdev->state_mutex); if (mvdev->mdev_detach) return -ENOTCONN; - mutex_lock(&migf->lock); - if (!migf->total_length) { - err = -EINVAL; - goto end; + if (!buf->dmaed) { + err = mlx5vf_dma_data_buffer(buf); + if (err) + return err; } - mdev = mvdev->mdev; - err = mlx5_core_alloc_pd(mdev, &pdn); - if (err) - goto end; - - err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); - if (err) - goto err_reg; - - err = _create_mkey(mdev, pdn, migf, NULL, &mkey); - if (err) - goto err_mkey; - MLX5_SET(load_vhca_state_in, in, opcode, MLX5_CMD_OP_LOAD_VHCA_STATE); MLX5_SET(load_vhca_state_in, in, op_mod, 0); MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id); - MLX5_SET(load_vhca_state_in, in, mkey, mkey); - MLX5_SET(load_vhca_state_in, in, size, migf->total_length); + MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey); + MLX5_SET(load_vhca_state_in, in, size, buf->length); + return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out); +} - err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out); +int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf) +{ + int err; - mlx5_core_destroy_mkey(mdev, mkey); -err_mkey: - dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0); -err_reg: - mlx5_core_dealloc_pd(mdev, pdn); -end: - mutex_unlock(&migf->lock); + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return -ENOTCONN; + + err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn); return err; } +void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf) +{ + lockdep_assert_held(&migf->mvdev->state_mutex); + if (migf->mvdev->mdev_detach) + return; + + mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn); +} + +void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf) +{ + struct mlx5_vhca_data_buffer *entry; + + lockdep_assert_held(&migf->mvdev->state_mutex); + WARN_ON(migf->mvdev->mdev_detach); + + if (migf->buf) { + mlx5vf_free_data_buffer(migf->buf); + migf->buf = NULL; + } + + if (migf->buf_header) { + mlx5vf_free_data_buffer(migf->buf_header); + migf->buf_header = NULL; + } + + list_splice(&migf->avail_list, &migf->buf_list); + + while ((entry = list_first_entry_or_null(&migf->buf_list, + struct mlx5_vhca_data_buffer, buf_elm))) { + list_del(&entry->buf_elm); + mlx5vf_free_data_buffer(entry); + } + + mlx5vf_cmd_dealloc_pd(migf); +} + static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes, u32 req_nodes) { diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h index 921d5720a1e57b87a901b8374ac6f4870bb1bafb..5483171d57ad35c01b0010d2ce9e5d2a11169545 100644 --- a/drivers/vfio/pci/mlx5/cmd.h +++ b/drivers/vfio/pci/mlx5/cmd.h @@ -12,31 +12,74 @@ #include #include +#define MLX5VF_PRE_COPY_SUPP(mvdev) \ + ((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY) + +enum mlx5_vf_migf_state { + MLX5_MIGF_STATE_ERROR = 1, + MLX5_MIGF_STATE_PRE_COPY_ERROR, + MLX5_MIGF_STATE_PRE_COPY, + MLX5_MIGF_STATE_SAVE_LAST, + MLX5_MIGF_STATE_COMPLETE, +}; + +enum mlx5_vf_load_state { + MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER, + MLX5_VF_LOAD_STATE_READ_HEADER, + MLX5_VF_LOAD_STATE_PREP_IMAGE, + MLX5_VF_LOAD_STATE_READ_IMAGE, + MLX5_VF_LOAD_STATE_LOAD_IMAGE, +}; + +struct mlx5_vf_migration_header { + __le64 image_size; + /* For future use in case we may need to change the kernel protocol */ + __le64 flags; +}; + +struct mlx5_vhca_data_buffer { + struct sg_append_table table; + loff_t start_pos; + u64 length; + u64 allocated_length; + u64 header_image_size; + u32 mkey; + enum dma_data_direction dma_dir; + u8 dmaed:1; + struct list_head buf_elm; + struct mlx5_vf_migration_file *migf; + /* Optimize mlx5vf_get_migration_page() for sequential access */ + struct scatterlist *last_offset_sg; + unsigned int sg_last_entry; + unsigned long last_offset; +}; + struct mlx5vf_async_data { struct mlx5_async_work cb_work; struct work_struct work; + struct mlx5_vhca_data_buffer *buf; + struct mlx5_vhca_data_buffer *header_buf; int status; - u32 pdn; - u32 mkey; + u8 last_chunk:1; void *out; }; struct mlx5_vf_migration_file { struct file *filp; struct mutex lock; - u8 disabled:1; - u8 is_err:1; + enum mlx5_vf_migf_state state; - struct sg_append_table table; - size_t total_length; - size_t allocated_length; - - /* Optimize mlx5vf_get_migration_page() for sequential access */ - struct scatterlist *last_offset_sg; - unsigned int sg_last_entry; - unsigned long last_offset; + enum mlx5_vf_load_state load_state; + u32 pdn; + loff_t max_pos; + struct mlx5_vhca_data_buffer *buf; + struct mlx5_vhca_data_buffer *buf_header; + spinlock_t list_lock; + struct list_head buf_list; + struct list_head avail_list; struct mlx5vf_pci_core_device *mvdev; wait_queue_head_t poll_wait; + struct completion save_comp; struct mlx5_async_ctx async_ctx; struct mlx5vf_async_data async_data; }; @@ -113,19 +156,42 @@ struct mlx5vf_pci_core_device { struct mlx5_core_dev *mdev; }; +enum { + MLX5VF_QUERY_INC = (1UL << 0), + MLX5VF_QUERY_FINAL = (1UL << 1), +}; + int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod); int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev, - size_t *state_size); + size_t *state_size, u8 query_flags); void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev, const struct vfio_migration_ops *mig_ops, const struct vfio_log_ops *log_ops); void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev); int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf); + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf, bool inc, + bool track); int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev, - struct mlx5_vf_migration_file *migf); + struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *buf); +int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf); +void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf); +void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf); +struct mlx5_vhca_data_buffer * +mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir); +void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf); +struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf, + size_t length, enum dma_data_direction dma_dir); +void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf); +int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages); +struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, + unsigned long offset); void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev); void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work); diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c index fd6ccb8454a24a496c351fb415f96ee2fd9361a5..9feb89c6d939daccbaabfcb76a147d15603db4f0 100644 --- a/drivers/vfio/pci/mlx5/main.c +++ b/drivers/vfio/pci/mlx5/main.c @@ -32,8 +32,8 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev) core_device); } -static struct page * -mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, +struct page * +mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf, unsigned long offset) { unsigned long cur_offset = 0; @@ -41,20 +41,20 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, unsigned int i; /* All accesses are sequential */ - if (offset < migf->last_offset || !migf->last_offset_sg) { - migf->last_offset = 0; - migf->last_offset_sg = migf->table.sgt.sgl; - migf->sg_last_entry = 0; + if (offset < buf->last_offset || !buf->last_offset_sg) { + buf->last_offset = 0; + buf->last_offset_sg = buf->table.sgt.sgl; + buf->sg_last_entry = 0; } - cur_offset = migf->last_offset; + cur_offset = buf->last_offset; - for_each_sg(migf->last_offset_sg, sg, - migf->table.sgt.orig_nents - migf->sg_last_entry, i) { + for_each_sg(buf->last_offset_sg, sg, + buf->table.sgt.orig_nents - buf->sg_last_entry, i) { if (offset < sg->length + cur_offset) { - migf->last_offset_sg = sg; - migf->sg_last_entry += i; - migf->last_offset = cur_offset; + buf->last_offset_sg = sg; + buf->sg_last_entry += i; + buf->last_offset = cur_offset; return nth_page(sg_page(sg), (offset - cur_offset) / PAGE_SIZE); } @@ -63,8 +63,8 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf, return NULL; } -static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf, - unsigned int npages) +int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf, + unsigned int npages) { unsigned int to_alloc = npages; struct page **page_list; @@ -85,13 +85,13 @@ static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf, } to_alloc -= filled; ret = sg_alloc_append_table_from_pages( - &migf->table, page_list, filled, 0, + &buf->table, page_list, filled, 0, filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC, GFP_KERNEL); if (ret) goto err; - migf->allocated_length += filled * PAGE_SIZE; + buf->allocated_length += filled * PAGE_SIZE; /* clean input for another bulk allocation */ memset(page_list, 0, filled * sizeof(*page_list)); to_fill = min_t(unsigned int, to_alloc, @@ -108,16 +108,8 @@ err: static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf) { - struct sg_page_iter sg_iter; - mutex_lock(&migf->lock); - /* Undo alloc_pages_bulk_array() */ - for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0) - __free_page(sg_page_iter_page(&sg_iter)); - sg_free_append_table(&migf->table); - migf->disabled = true; - migf->total_length = 0; - migf->allocated_length = 0; + migf->state = MLX5_MIGF_STATE_ERROR; migf->filp->f_pos = 0; mutex_unlock(&migf->lock); } @@ -132,10 +124,91 @@ static int mlx5vf_release_file(struct inode *inode, struct file *filp) return 0; } +static struct mlx5_vhca_data_buffer * +mlx5vf_get_data_buff_from_pos(struct mlx5_vf_migration_file *migf, loff_t pos, + bool *end_of_data) +{ + struct mlx5_vhca_data_buffer *buf; + bool found = false; + + *end_of_data = false; + spin_lock_irq(&migf->list_lock); + if (list_empty(&migf->buf_list)) { + *end_of_data = true; + goto end; + } + + buf = list_first_entry(&migf->buf_list, struct mlx5_vhca_data_buffer, + buf_elm); + if (pos >= buf->start_pos && + pos < buf->start_pos + buf->length) { + found = true; + goto end; + } + + /* + * As we use a stream based FD we may expect having the data always + * on first chunk + */ + migf->state = MLX5_MIGF_STATE_ERROR; + +end: + spin_unlock_irq(&migf->list_lock); + return found ? buf : NULL; +} + +static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf, + char __user **buf, size_t *len, loff_t *pos) +{ + unsigned long offset; + ssize_t done = 0; + size_t copy_len; + + copy_len = min_t(size_t, + vhca_buf->start_pos + vhca_buf->length - *pos, *len); + while (copy_len) { + size_t page_offset; + struct page *page; + size_t page_len; + u8 *from_buff; + int ret; + + offset = *pos - vhca_buf->start_pos; + page_offset = offset % PAGE_SIZE; + offset -= page_offset; + page = mlx5vf_get_migration_page(vhca_buf, offset); + if (!page) + return -EINVAL; + page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset); + from_buff = kmap_local_page(page); + ret = copy_to_user(*buf, from_buff + page_offset, page_len); + kunmap_local(from_buff); + if (ret) + return -EFAULT; + *pos += page_len; + *len -= page_len; + *buf += page_len; + done += page_len; + copy_len -= page_len; + } + + if (*pos >= vhca_buf->start_pos + vhca_buf->length) { + spin_lock_irq(&vhca_buf->migf->list_lock); + list_del_init(&vhca_buf->buf_elm); + list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list); + spin_unlock_irq(&vhca_buf->migf->list_lock); + } + + return done; +} + static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5_vhca_data_buffer *vhca_buf; + bool first_loop_call = true; + bool end_of_data; ssize_t done = 0; if (pos) @@ -144,52 +217,56 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len, if (!(filp->f_flags & O_NONBLOCK)) { if (wait_event_interruptible(migf->poll_wait, - READ_ONCE(migf->total_length) || migf->is_err)) + !list_empty(&migf->buf_list) || + migf->state == MLX5_MIGF_STATE_ERROR || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR || + migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_COMPLETE)) return -ERESTARTSYS; } mutex_lock(&migf->lock); - if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) { - done = -EAGAIN; - goto out_unlock; - } - if (*pos > migf->total_length) { - done = -EINVAL; - goto out_unlock; - } - if (migf->disabled || migf->is_err) { + if (migf->state == MLX5_MIGF_STATE_ERROR) { done = -ENODEV; goto out_unlock; } - len = min_t(size_t, migf->total_length - *pos, len); while (len) { - size_t page_offset; - struct page *page; - size_t page_len; - u8 *from_buff; - int ret; + ssize_t count; + + vhca_buf = mlx5vf_get_data_buff_from_pos(migf, *pos, + &end_of_data); + if (first_loop_call) { + first_loop_call = false; + /* Temporary end of file as part of PRE_COPY */ + if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY || + migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) { + done = -ENOMSG; + goto out_unlock; + } + + if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) { + if (filp->f_flags & O_NONBLOCK) { + done = -EAGAIN; + goto out_unlock; + } + } + } + + if (end_of_data) + goto out_unlock; - page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(migf, *pos - page_offset); - if (!page) { - if (done == 0) - done = -EINVAL; + if (!vhca_buf) { + done = -EINVAL; goto out_unlock; } - page_len = min_t(size_t, len, PAGE_SIZE - page_offset); - from_buff = kmap_local_page(page); - ret = copy_to_user(buf, from_buff + page_offset, page_len); - kunmap_local(from_buff); - if (ret) { - done = -EFAULT; + count = mlx5vf_buf_read(vhca_buf, &buf, &len, pos); + if (count < 0) { + done = count; goto out_unlock; } - *pos += page_len; - len -= page_len; - done += page_len; - buf += page_len; + done += count; } out_unlock: @@ -206,27 +283,188 @@ static __poll_t mlx5vf_save_poll(struct file *filp, poll_wait(filp, &migf->poll_wait, wait); mutex_lock(&migf->lock); - if (migf->disabled || migf->is_err) + if (migf->state == MLX5_MIGF_STATE_ERROR) pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - else if (READ_ONCE(migf->total_length)) + else if (!list_empty(&migf->buf_list) || + migf->state == MLX5_MIGF_STATE_COMPLETE) pollflags = EPOLLIN | EPOLLRDNORM; mutex_unlock(&migf->lock); return pollflags; } +/* + * FD is exposed and user can use it after receiving an error. + * Mark migf in error, and wake the user. + */ +static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf) +{ + migf->state = MLX5_MIGF_STATE_ERROR; + wake_up_interruptible(&migf->poll_wait); +} + +static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5vf_pci_core_device *mvdev = migf->mvdev; + struct mlx5_vhca_data_buffer *buf; + struct vfio_precopy_info info = {}; + loff_t *pos = &filp->f_pos; + unsigned long minsz; + size_t inc_length = 0; + bool end_of_data; + int ret; + + if (cmd != VFIO_MIG_GET_PRECOPY_INFO) + return -ENOTTY; + + minsz = offsetofend(struct vfio_precopy_info, dirty_bytes); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + mutex_lock(&mvdev->state_mutex); + if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY && + mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) { + ret = -EINVAL; + goto err_state_unlock; + } + + /* + * We can't issue a SAVE command when the device is suspended, so as + * part of VFIO_DEVICE_STATE_PRE_COPY_P2P no reason to query for extra + * bytes that can't be read. + */ + if (mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY) { + /* + * Once the query returns it's guaranteed that there is no + * active SAVE command. + * As so, the other code below is safe with the proper locks. + */ + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length, + MLX5VF_QUERY_INC); + if (ret) + goto err_state_unlock; + } + + mutex_lock(&migf->lock); + if (migf->state == MLX5_MIGF_STATE_ERROR) { + ret = -ENODEV; + goto err_migf_unlock; + } + + buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data); + if (buf) { + if (buf->start_pos == 0) { + info.initial_bytes = buf->header_image_size - *pos; + } else if (buf->start_pos == + sizeof(struct mlx5_vf_migration_header)) { + /* First data buffer following the header */ + info.initial_bytes = buf->start_pos + + buf->length - *pos; + } else { + info.dirty_bytes = buf->start_pos + buf->length - *pos; + } + } else { + if (!end_of_data) { + ret = -EINVAL; + goto err_migf_unlock; + } + + info.dirty_bytes = inc_length; + } + + if (!end_of_data || !inc_length) { + mutex_unlock(&migf->lock); + goto done; + } + + mutex_unlock(&migf->lock); + /* + * We finished transferring the current state and the device has a + * dirty state, save a new state to be ready for. + */ + buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + mlx5vf_mark_err(migf); + goto err_state_unlock; + } + + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true); + if (ret) { + mlx5vf_mark_err(migf); + mlx5vf_put_data_buffer(buf); + goto err_state_unlock; + } + +done: + mlx5vf_state_mutex_unlock(mvdev); + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + return 0; + +err_migf_unlock: + mutex_unlock(&migf->lock); +err_state_unlock: + mlx5vf_state_mutex_unlock(mvdev); + return ret; +} + static const struct file_operations mlx5vf_save_fops = { .owner = THIS_MODULE, .read = mlx5vf_save_read, .poll = mlx5vf_save_poll, + .unlocked_ioctl = mlx5vf_precopy_ioctl, + .compat_ioctl = compat_ptr_ioctl, .release = mlx5vf_release_file, .llseek = no_llseek, }; +static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev) +{ + struct mlx5_vf_migration_file *migf = mvdev->saving_migf; + struct mlx5_vhca_data_buffer *buf; + size_t length; + int ret; + + if (migf->state == MLX5_MIGF_STATE_ERROR) + return -ENODEV; + + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, + MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL); + if (ret) + goto err; + + buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto err; + } + + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false); + if (ret) + goto err_save; + + return 0; + +err_save: + mlx5vf_put_data_buffer(buf); +err: + mlx5vf_mark_err(migf); + return ret; +} + static struct mlx5_vf_migration_file * -mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) +mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track) { struct mlx5_vf_migration_file *migf; + struct mlx5_vhca_data_buffer *buf; + size_t length; int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL); @@ -236,43 +474,211 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev) migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf, O_RDONLY); if (IS_ERR(migf->filp)) { - int err = PTR_ERR(migf->filp); - - kfree(migf); - return ERR_PTR(err); + ret = PTR_ERR(migf->filp); + goto end; } + migf->mvdev = mvdev; + ret = mlx5vf_cmd_alloc_pd(migf); + if (ret) + goto out_free; + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); init_waitqueue_head(&migf->poll_wait); + init_completion(&migf->save_comp); + /* + * save_comp is being used as a binary semaphore built from + * a completion. A normal mutex cannot be used because the lock is + * passed between kernel threads and lockdep can't model this. + */ + complete(&migf->save_comp); mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx); INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb); - ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, - &migf->total_length); + INIT_LIST_HEAD(&migf->buf_list); + INIT_LIST_HEAD(&migf->avail_list); + spin_lock_init(&migf->list_lock); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0); if (ret) - goto out_free; + goto out_pd; - ret = mlx5vf_add_migration_pages( - migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE)); - if (ret) - goto out_free; + buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_pd; + } - migf->mvdev = mvdev; - ret = mlx5vf_cmd_save_vhca_state(mvdev, migf); + ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track); if (ret) - goto out_free; + goto out_save; return migf; +out_save: + mlx5vf_free_data_buffer(buf); +out_pd: + mlx5vf_cmd_dealloc_pd(migf); out_free: fput(migf->filp); +end: + kfree(migf); return ERR_PTR(ret); } +static int +mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + unsigned long offset; + size_t page_offset; + struct page *page; + size_t page_len; + u8 *to_buff; + int ret; + + offset = *pos - vhca_buf->start_pos; + page_offset = offset % PAGE_SIZE; + + page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset); + if (!page) + return -EINVAL; + page_len = min_t(size_t, *len, PAGE_SIZE - page_offset); + to_buff = kmap_local_page(page); + ret = copy_from_user(to_buff + page_offset, *buf, page_len); + kunmap_local(to_buff); + if (ret) + return -EFAULT; + + *pos += page_len; + *done += page_len; + *buf += page_len; + *len -= page_len; + vhca_buf->length += page_len; + return 0; +} + +static int +mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf, + loff_t requested_length, + const char __user **buf, size_t *len, + loff_t *pos, ssize_t *done) +{ + int ret; + + if (requested_length > MAX_MIGRATION_SIZE) + return -ENOMEM; + + if (vhca_buf->allocated_length < requested_length) { + ret = mlx5vf_add_migration_pages( + vhca_buf, + DIV_ROUND_UP(requested_length - vhca_buf->allocated_length, + PAGE_SIZE)); + if (ret) + return ret; + } + + while (*len) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos, + done); + if (ret) + return ret; + } + + return 0; +} + +static ssize_t +mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + size_t image_size, const char __user **buf, + size_t *len, loff_t *pos, ssize_t *done, + bool *has_work) +{ + size_t copy_len, to_copy; + int ret; + + to_copy = min_t(size_t, *len, image_size - vhca_buf->length); + copy_len = to_copy; + while (to_copy) { + ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos, + done); + if (ret) + return ret; + } + + *len -= copy_len; + if (vhca_buf->length == image_size) { + migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE; + migf->max_pos += image_size; + *has_work = true; + } + + return 0; +} + +static int +mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf, + struct mlx5_vhca_data_buffer *vhca_buf, + const char __user **buf, + size_t *len, loff_t *pos, + ssize_t *done, bool *has_work) +{ + struct page *page; + size_t copy_len; + u8 *to_buff; + int ret; + + copy_len = min_t(size_t, *len, + sizeof(struct mlx5_vf_migration_header) - vhca_buf->length); + page = mlx5vf_get_migration_page(vhca_buf, 0); + if (!page) + return -EINVAL; + to_buff = kmap_local_page(page); + ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len); + if (ret) { + ret = -EFAULT; + goto end; + } + + *buf += copy_len; + *pos += copy_len; + *done += copy_len; + *len -= copy_len; + vhca_buf->length += copy_len; + if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) { + u64 flags; + + vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff); + if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) { + ret = -ENOMEM; + goto end; + } + + flags = le64_to_cpup((__le64 *)(to_buff + + offsetof(struct mlx5_vf_migration_header, flags))); + if (flags) { + ret = -EOPNOTSUPP; + goto end; + } + + migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE; + migf->max_pos += vhca_buf->length; + *has_work = true; + } +end: + kunmap_local(to_buff); + return ret; +} + static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, size_t len, loff_t *pos) { struct mlx5_vf_migration_file *migf = filp->private_data; + struct mlx5_vhca_data_buffer *vhca_buf = migf->buf; + struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header; loff_t requested_length; + bool has_work = false; ssize_t done = 0; + int ret = 0; if (pos) return -ESPIPE; @@ -282,56 +688,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf, check_add_overflow((loff_t)len, *pos, &requested_length)) return -EINVAL; - if (requested_length > MAX_MIGRATION_SIZE) - return -ENOMEM; - + mutex_lock(&migf->mvdev->state_mutex); mutex_lock(&migf->lock); - if (migf->disabled) { - done = -ENODEV; + if (migf->state == MLX5_MIGF_STATE_ERROR) { + ret = -ENODEV; goto out_unlock; } - if (migf->allocated_length < requested_length) { - done = mlx5vf_add_migration_pages( - migf, - DIV_ROUND_UP(requested_length - migf->allocated_length, - PAGE_SIZE)); - if (done) - goto out_unlock; - } - - while (len) { - size_t page_offset; - struct page *page; - size_t page_len; - u8 *to_buff; - int ret; - - page_offset = (*pos) % PAGE_SIZE; - page = mlx5vf_get_migration_page(migf, *pos - page_offset); - if (!page) { - if (done == 0) - done = -EINVAL; - goto out_unlock; + while (len || has_work) { + has_work = false; + switch (migf->load_state) { + case MLX5_VF_LOAD_STATE_READ_HEADER: + ret = mlx5vf_resume_read_header(migf, vhca_buf_header, + &buf, &len, pos, + &done, &has_work); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_PREP_IMAGE: + { + u64 size = vhca_buf_header->header_image_size; + + if (vhca_buf->allocated_length < size) { + mlx5vf_free_data_buffer(vhca_buf); + + migf->buf = mlx5vf_alloc_data_buffer(migf, + size, DMA_TO_DEVICE); + if (IS_ERR(migf->buf)) { + ret = PTR_ERR(migf->buf); + migf->buf = NULL; + goto out_unlock; + } + + vhca_buf = migf->buf; + } + + vhca_buf->start_pos = migf->max_pos; + migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE; + break; } + case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER: + ret = mlx5vf_resume_read_image_no_header(vhca_buf, + requested_length, + &buf, &len, pos, &done); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_READ_IMAGE: + ret = mlx5vf_resume_read_image(migf, vhca_buf, + vhca_buf_header->header_image_size, + &buf, &len, pos, &done, &has_work); + if (ret) + goto out_unlock; + break; + case MLX5_VF_LOAD_STATE_LOAD_IMAGE: + ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf); + if (ret) + goto out_unlock; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + + /* prep header buf for next image */ + vhca_buf_header->length = 0; + vhca_buf_header->header_image_size = 0; + /* prep data buf for next image */ + vhca_buf->length = 0; - page_len = min_t(size_t, len, PAGE_SIZE - page_offset); - to_buff = kmap_local_page(page); - ret = copy_from_user(to_buff + page_offset, buf, page_len); - kunmap_local(to_buff); - if (ret) { - done = -EFAULT; - goto out_unlock; + break; + default: + break; } - *pos += page_len; - len -= page_len; - done += page_len; - buf += page_len; - migf->total_length += page_len; } + out_unlock: + if (ret) + migf->state = MLX5_MIGF_STATE_ERROR; mutex_unlock(&migf->lock); - return done; + mlx5vf_state_mutex_unlock(migf->mvdev); + return ret ? ret : done; } static const struct file_operations mlx5vf_resume_fops = { @@ -345,6 +778,8 @@ static struct mlx5_vf_migration_file * mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) { struct mlx5_vf_migration_file *migf; + struct mlx5_vhca_data_buffer *buf; + int ret; migf = kzalloc(sizeof(*migf), GFP_KERNEL); if (!migf) @@ -353,20 +788,59 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev) migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf, O_WRONLY); if (IS_ERR(migf->filp)) { - int err = PTR_ERR(migf->filp); + ret = PTR_ERR(migf->filp); + goto end; + } - kfree(migf); - return ERR_PTR(err); + migf->mvdev = mvdev; + ret = mlx5vf_cmd_alloc_pd(migf); + if (ret) + goto out_free; + + buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_pd; + } + + migf->buf = buf; + if (MLX5VF_PRE_COPY_SUPP(mvdev)) { + buf = mlx5vf_alloc_data_buffer(migf, + sizeof(struct mlx5_vf_migration_header), DMA_NONE); + if (IS_ERR(buf)) { + ret = PTR_ERR(buf); + goto out_buf; + } + + migf->buf_header = buf; + migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER; + } else { + /* Initial state will be to read the image */ + migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER; } + stream_open(migf->filp->f_inode, migf->filp); mutex_init(&migf->lock); + INIT_LIST_HEAD(&migf->buf_list); + INIT_LIST_HEAD(&migf->avail_list); + spin_lock_init(&migf->list_lock); return migf; +out_buf: + mlx5vf_free_data_buffer(migf->buf); +out_pd: + mlx5vf_cmd_dealloc_pd(migf); +out_free: + fput(migf->filp); +end: + kfree(migf); + return ERR_PTR(ret); } void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) { if (mvdev->resuming_migf) { mlx5vf_disable_fd(mvdev->resuming_migf); + mlx5fv_cmd_clean_migf_resources(mvdev->resuming_migf); fput(mvdev->resuming_migf->filp); mvdev->resuming_migf = NULL; } @@ -374,6 +848,7 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev) mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx); cancel_work_sync(&mvdev->saving_migf->async_data.work); mlx5vf_disable_fd(mvdev->saving_migf); + mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf); fput(mvdev->saving_migf->filp); mvdev->saving_migf = NULL; } @@ -402,7 +877,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) { + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { ret = mlx5vf_cmd_suspend_vhca(mvdev, MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR); if (ret) @@ -410,7 +886,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return NULL; } - if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) { + if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) { ret = mlx5vf_cmd_resume_vhca(mvdev, MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR); if (ret) @@ -421,7 +898,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) { struct mlx5_vf_migration_file *migf; - migf = mlx5vf_pci_save_device_data(mvdev); + migf = mlx5vf_pci_save_device_data(mvdev, false); if (IS_ERR(migf)) return ERR_CAST(migf); get_file(migf->filp); @@ -429,7 +906,10 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, return migf->filp; } - if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) { + if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) || + (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) || + (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && + new == VFIO_DEVICE_STATE_RUNNING_P2P)) { mlx5vf_disable_fds(mvdev); return NULL; } @@ -446,14 +926,39 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev, } if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) { - ret = mlx5vf_cmd_load_vhca_state(mvdev, - mvdev->resuming_migf); - if (ret) - return ERR_PTR(ret); + if (!MLX5VF_PRE_COPY_SUPP(mvdev)) { + ret = mlx5vf_cmd_load_vhca_state(mvdev, + mvdev->resuming_migf, + mvdev->resuming_migf->buf); + if (ret) + return ERR_PTR(ret); + } mlx5vf_disable_fds(mvdev); return NULL; } + if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) || + (cur == VFIO_DEVICE_STATE_RUNNING_P2P && + new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) { + struct mlx5_vf_migration_file *migf; + + migf = mlx5vf_pci_save_device_data(mvdev, true); + if (IS_ERR(migf)) + return ERR_CAST(migf); + get_file(migf->filp); + mvdev->saving_migf = migf; + return migf->filp; + } + + if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) { + ret = mlx5vf_cmd_suspend_vhca(mvdev, + MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER); + if (ret) + return ERR_PTR(ret); + ret = mlx5vf_pci_save_device_inc_data(mvdev); + return ret ? ERR_PTR(ret) : NULL; + } + /* * vfio_mig_get_next_state() does not use arcs other than the above */ @@ -512,6 +1017,23 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev, return res; } +static int mlx5vf_pci_get_data_size(struct vfio_device *vdev, + unsigned long *stop_copy_length) +{ + struct mlx5vf_pci_core_device *mvdev = container_of( + vdev, struct mlx5vf_pci_core_device, core_device.vdev); + size_t state_size; + int ret; + + mutex_lock(&mvdev->state_mutex); + ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, + &state_size, 0); + if (!ret) + *stop_copy_length = state_size; + mlx5vf_state_mutex_unlock(mvdev); + return ret; +} + static int mlx5vf_pci_get_device_state(struct vfio_device *vdev, enum vfio_device_mig_state *curr_state) { @@ -577,6 +1099,7 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev) static const struct vfio_migration_ops mlx5vf_pci_mig_ops = { .migration_set_state = mlx5vf_pci_set_device_state, .migration_get_state = mlx5vf_pci_get_device_state, + .migration_get_data_size = mlx5vf_pci_get_data_size, }; static const struct vfio_log_ops mlx5vf_pci_log_ops = { @@ -623,6 +1146,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int mlx5vf_pci_probe(struct pci_dev *pdev, @@ -676,18 +1202,7 @@ static struct pci_driver mlx5vf_pci_driver = { .driver_managed_dma = true, }; -static void __exit mlx5vf_pci_cleanup(void) -{ - pci_unregister_driver(&mlx5vf_pci_driver); -} - -static int __init mlx5vf_pci_init(void) -{ - return pci_register_driver(&mlx5vf_pci_driver); -} - -module_init(mlx5vf_pci_init); -module_exit(mlx5vf_pci_cleanup); +module_pci_driver(mlx5vf_pci_driver); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Max Gurtovoy "); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 1d4919edfbde488918327ca894529c291310f7cf..29091ee2e9849bafc044a70559b7db1f2f260116 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = { .mmap = vfio_pci_core_mmap, .request = vfio_pci_core_request, .match = vfio_pci_core_match, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c index e030c2120183ef5c6bd4019ad50e5e6ac4f40a6c..26a541cc64d114a22e4f9688c71f3a163683ebdc 100644 --- a/drivers/vfio/pci/vfio_pci_core.c +++ b/drivers/vfio/pci/vfio_pci_core.c @@ -27,6 +27,9 @@ #include #include #include +#if IS_ENABLED(CONFIG_EEH) +#include +#endif #include "vfio_pci_priv.h" @@ -686,7 +689,9 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev) vdev->sriov_pf_core_dev->vf_token->users--; mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock); } - vfio_spapr_pci_eeh_release(vdev->pdev); +#if IS_ENABLED(CONFIG_EEH) + eeh_dev_release(vdev->pdev); +#endif vfio_pci_core_disable(vdev); mutex_lock(&vdev->igate); @@ -705,7 +710,9 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device); void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev) { vfio_pci_probe_mmaps(vdev); - vfio_spapr_pci_eeh_open(vdev->pdev); +#if IS_ENABLED(CONFIG_EEH) + eeh_dev_open(vdev->pdev); +#endif if (vdev->sriov_pf_core_dev) { mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock); @@ -2109,7 +2116,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev) mutex_destroy(&vdev->vma_lock); kfree(vdev->region); kfree(vdev->pm_save); - vfio_free_device(core_vdev); } EXPORT_SYMBOL_GPL(vfio_pci_core_release_dev); @@ -2128,7 +2134,8 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev) if (vdev->vdev.mig_ops) { if (!(vdev->vdev.mig_ops->migration_get_state && - vdev->vdev.mig_ops->migration_set_state) || + vdev->vdev.mig_ops->migration_set_state && + vdev->vdev.mig_ops->migration_get_data_size) || !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY)) return -EINVAL; } diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c index eaea63e5294c58d9874d5526d3921d0bfc88874d..83fe5401559585d241146258b9630d1670ed8600 100644 --- a/drivers/vfio/platform/vfio_amba.c +++ b/drivers/vfio/platform/vfio_amba.c @@ -95,7 +95,6 @@ static void vfio_amba_release_dev(struct vfio_device *core_vdev) vfio_platform_release_common(vdev); kfree(vdev->name); - vfio_free_device(core_vdev); } static void vfio_amba_remove(struct amba_device *adev) @@ -117,6 +116,9 @@ static const struct vfio_device_ops vfio_amba_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static const struct amba_id pl330_ids[] = { diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c index 82cedcebfd9022ca7e08fe920b01ebc5e3cfa62e..22a1efca32a8a86ae24ae683d2035d7cece8e016 100644 --- a/drivers/vfio/platform/vfio_platform.c +++ b/drivers/vfio/platform/vfio_platform.c @@ -83,7 +83,6 @@ static void vfio_platform_release_dev(struct vfio_device *core_vdev) container_of(core_vdev, struct vfio_platform_device, vdev); vfio_platform_release_common(vdev); - vfio_free_device(core_vdev); } static int vfio_platform_remove(struct platform_device *pdev) @@ -106,6 +105,9 @@ static const struct vfio_device_ops vfio_platform_ops = { .read = vfio_platform_read, .write = vfio_platform_write, .mmap = vfio_platform_mmap, + .bind_iommufd = vfio_iommufd_physical_bind, + .unbind_iommufd = vfio_iommufd_physical_unbind, + .attach_ioas = vfio_iommufd_physical_attach_ioas, }; static struct platform_driver vfio_platform_driver = { diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 55dc4f43c31e3eccbeada369c6de76ed89e5fc61..1a0a238ffa3543d7999c42b4d219ccfe7897fdb7 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -72,12 +72,11 @@ static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev, const char **extra_dbg) { #ifdef CONFIG_ACPI - struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; struct device *dev = vdev->device; acpi_handle handle = ACPI_HANDLE(dev); acpi_status acpi_ret; - acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer); + acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, NULL); if (ACPI_FAILURE(acpi_ret)) { if (extra_dbg) *extra_dbg = acpi_format_exception(acpi_ret); diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h index bcad54bbab08c4b8497e5aead5d5576fc45a01e4..f8219a438bfbf58d3f7c08e907a3dc4f660dfed5 100644 --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@ -6,14 +6,25 @@ #ifndef __VFIO_VFIO_H__ #define __VFIO_VFIO_H__ +#include #include #include #include +struct iommufd_ctx; struct iommu_group; struct vfio_device; struct vfio_container; +void vfio_device_put_registration(struct vfio_device *device); +bool vfio_device_try_get_registration(struct vfio_device *device); +int vfio_device_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm); +void vfio_device_close(struct vfio_device *device, + struct iommufd_ctx *iommufd); + +extern const struct file_operations vfio_device_fops; + enum vfio_group_type { /* * Physical device with IOMMU backing. @@ -54,14 +65,30 @@ struct vfio_group { struct list_head device_list; struct mutex device_lock; struct list_head vfio_next; +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) struct list_head container_next; +#endif enum vfio_group_type type; struct mutex group_lock; struct kvm *kvm; struct file *opened_file; struct blocking_notifier_head notifier; + struct iommufd_ctx *iommufd; }; +int vfio_device_set_group(struct vfio_device *device, + enum vfio_group_type type); +void vfio_device_remove_group(struct vfio_device *device); +void vfio_device_group_register(struct vfio_device *device); +void vfio_device_group_unregister(struct vfio_device *device); +int vfio_device_group_use_iommu(struct vfio_device *device); +void vfio_device_group_unuse_iommu(struct vfio_device *device); +void vfio_device_group_close(struct vfio_device *device); +bool vfio_device_has_container(struct vfio_device *device); +int __init vfio_group_init(void); +void vfio_group_cleanup(void); + +#if IS_ENABLED(CONFIG_VFIO_CONTAINER) /* events for the backend driver notify callback */ enum vfio_iommu_notify_type { VFIO_IOMMU_CONTAINER_CLOSE = 0, @@ -109,20 +136,114 @@ struct vfio_iommu_driver { int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops); -bool vfio_assert_device_open(struct vfio_device *device); - struct vfio_container *vfio_container_from_file(struct file *filep); -int vfio_device_assign_container(struct vfio_device *device); -void vfio_device_unassign_container(struct vfio_device *device); +int vfio_group_use_container(struct vfio_group *group); +void vfio_group_unuse_container(struct vfio_group *group); int vfio_container_attach_group(struct vfio_container *container, struct vfio_group *group); void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg); +int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages); +void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage); +int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write); + int __init vfio_container_init(void); void vfio_container_cleanup(void); +#else +static inline struct vfio_container * +vfio_container_from_file(struct file *filep) +{ + return NULL; +} + +static inline int vfio_group_use_container(struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_unuse_container(struct vfio_group *group) +{ +} + +static inline int vfio_container_attach_group(struct vfio_container *container, + struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_detach_container(struct vfio_group *group) +{ +} + +static inline void vfio_device_container_register(struct vfio_device *device) +{ +} + +static inline void vfio_device_container_unregister(struct vfio_device *device) +{ +} + +static inline int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage) +{ +} + +static inline int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write) +{ + return -EOPNOTSUPP; +} + +static inline int vfio_container_init(void) +{ + return 0; +} +static inline void vfio_container_cleanup(void) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); +void vfio_iommufd_unbind(struct vfio_device *device); +#else +static inline int vfio_iommufd_bind(struct vfio_device *device, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_iommufd_unbind(struct vfio_device *device) +{ +} +#endif + +#if IS_ENABLED(CONFIG_VFIO_VIRQFD) +int __init vfio_virqfd_init(void); +void vfio_virqfd_exit(void); +#else +static inline int __init vfio_virqfd_init(void) +{ + return 0; +} +static inline void vfio_virqfd_exit(void) +{ +} +#endif #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 169f07ac162d9c9520eefa47674591a820ca97eb..60a50ce8701e5c6292e46f398563f2ccb9aec866 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -4,6 +4,7 @@ * * Copyright (C) 2013 IBM Corp. All rights reserved. * Author: Alexey Kardashevskiy + * Copyright Gavin Shan, IBM Corporation 2014. * * Derived from original vfio_iommu_type1.c: * Copyright (C) 2012 Red Hat, Inc. All rights reserved. @@ -773,6 +774,57 @@ static long tce_iommu_create_default_window(struct tce_container *container) return ret; } +static long vfio_spapr_ioctl_eeh_pe_op(struct iommu_group *group, + unsigned long arg) +{ + struct eeh_pe *pe; + struct vfio_eeh_pe_op op; + unsigned long minsz; + + pe = eeh_iommu_group_to_pe(group); + if (!pe) + return -ENODEV; + + minsz = offsetofend(struct vfio_eeh_pe_op, op); + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + if (op.argsz < minsz || op.flags) + return -EINVAL; + + switch (op.op) { + case VFIO_EEH_PE_DISABLE: + return eeh_pe_set_option(pe, EEH_OPT_DISABLE); + case VFIO_EEH_PE_ENABLE: + return eeh_pe_set_option(pe, EEH_OPT_ENABLE); + case VFIO_EEH_PE_UNFREEZE_IO: + return eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); + case VFIO_EEH_PE_UNFREEZE_DMA: + return eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); + case VFIO_EEH_PE_GET_STATE: + return eeh_pe_get_state(pe); + break; + case VFIO_EEH_PE_RESET_DEACTIVATE: + return eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); + case VFIO_EEH_PE_RESET_HOT: + return eeh_pe_reset(pe, EEH_RESET_HOT, true); + case VFIO_EEH_PE_RESET_FUNDAMENTAL: + return eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); + case VFIO_EEH_PE_CONFIGURE: + return eeh_pe_configure(pe); + case VFIO_EEH_PE_INJECT_ERR: + minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); + if (op.argsz < minsz) + return -EINVAL; + if (copy_from_user(&op, (void __user *)arg, minsz)) + return -EFAULT; + + return eeh_pe_inject_err(pe, op.err.type, op.err.func, + op.err.addr, op.err.mask); + default: + return -EINVAL; + } +} + static long tce_iommu_ioctl(void *iommu_data, unsigned int cmd, unsigned long arg) { @@ -785,14 +837,12 @@ static long tce_iommu_ioctl(void *iommu_data, switch (arg) { case VFIO_SPAPR_TCE_IOMMU: case VFIO_SPAPR_TCE_v2_IOMMU: - ret = 1; - break; + return 1; + case VFIO_EEH: + return eeh_enabled(); default: - ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg); - break; + return 0; } - - return (ret < 0) ? 0 : ret; } /* @@ -1046,8 +1096,7 @@ static long tce_iommu_ioctl(void *iommu_data, ret = 0; list_for_each_entry(tcegrp, &container->group_list, next) { - ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp, - cmd, arg); + ret = vfio_spapr_ioctl_eeh_pe_op(tcegrp->grp, arg); if (ret) return ret; } diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c index 6e8804fe00953d4fa7beaf1b503751302cefcafc..5177bb061b17b51b9e8e9030d9bdf13c0b6a2b4a 100644 --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@ -13,8 +13,6 @@ #include #include #include -#include -#include #include #include #include @@ -35,6 +33,7 @@ #include #include #include +#include #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -42,17 +41,11 @@ #define DRIVER_DESC "VFIO - User Level meta-driver" static struct vfio { - struct class *class; - struct list_head group_list; - struct mutex group_lock; /* locks group_list */ - struct ida group_ida; - dev_t group_devt; struct class *device_class; struct ida device_ida; } vfio; static DEFINE_XARRAY(vfio_device_set_xa); -static const struct file_operations vfio_group_fops; int vfio_assign_device_set(struct vfio_device *device, void *set_id) { @@ -138,208 +131,21 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set) } EXPORT_SYMBOL_GPL(vfio_device_set_open_count); -/* - * Group objects - create, release, get, put, search - */ -static struct vfio_group * -__vfio_group_get_from_iommu(struct iommu_group *iommu_group) -{ - struct vfio_group *group; - - /* - * group->iommu_group from the vfio.group_list cannot be NULL - * under the vfio.group_lock. - */ - list_for_each_entry(group, &vfio.group_list, vfio_next) { - if (group->iommu_group == iommu_group) { - refcount_inc(&group->drivers); - return group; - } - } - return NULL; -} - -static struct vfio_group * -vfio_group_get_from_iommu(struct iommu_group *iommu_group) -{ - struct vfio_group *group; - - mutex_lock(&vfio.group_lock); - group = __vfio_group_get_from_iommu(iommu_group); - mutex_unlock(&vfio.group_lock); - return group; -} - -static void vfio_group_release(struct device *dev) -{ - struct vfio_group *group = container_of(dev, struct vfio_group, dev); - - mutex_destroy(&group->device_lock); - mutex_destroy(&group->group_lock); - WARN_ON(group->iommu_group); - ida_free(&vfio.group_ida, MINOR(group->dev.devt)); - kfree(group); -} - -static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group, - enum vfio_group_type type) -{ - struct vfio_group *group; - int minor; - - group = kzalloc(sizeof(*group), GFP_KERNEL); - if (!group) - return ERR_PTR(-ENOMEM); - - minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL); - if (minor < 0) { - kfree(group); - return ERR_PTR(minor); - } - - device_initialize(&group->dev); - group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor); - group->dev.class = vfio.class; - group->dev.release = vfio_group_release; - cdev_init(&group->cdev, &vfio_group_fops); - group->cdev.owner = THIS_MODULE; - - refcount_set(&group->drivers, 1); - mutex_init(&group->group_lock); - INIT_LIST_HEAD(&group->device_list); - mutex_init(&group->device_lock); - group->iommu_group = iommu_group; - /* put in vfio_group_release() */ - iommu_group_ref_get(iommu_group); - group->type = type; - BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier); - - return group; -} - -static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group, - enum vfio_group_type type) -{ - struct vfio_group *group; - struct vfio_group *ret; - int err; - - group = vfio_group_alloc(iommu_group, type); - if (IS_ERR(group)) - return group; - - err = dev_set_name(&group->dev, "%s%d", - group->type == VFIO_NO_IOMMU ? "noiommu-" : "", - iommu_group_id(iommu_group)); - if (err) { - ret = ERR_PTR(err); - goto err_put; - } - - mutex_lock(&vfio.group_lock); - - /* Did we race creating this group? */ - ret = __vfio_group_get_from_iommu(iommu_group); - if (ret) - goto err_unlock; - - err = cdev_device_add(&group->cdev, &group->dev); - if (err) { - ret = ERR_PTR(err); - goto err_unlock; - } - - list_add(&group->vfio_next, &vfio.group_list); - - mutex_unlock(&vfio.group_lock); - return group; - -err_unlock: - mutex_unlock(&vfio.group_lock); -err_put: - put_device(&group->dev); - return ret; -} - -static void vfio_device_remove_group(struct vfio_device *device) -{ - struct vfio_group *group = device->group; - struct iommu_group *iommu_group; - - if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU) - iommu_group_remove_device(device->dev); - - /* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */ - if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock)) - return; - list_del(&group->vfio_next); - - /* - * We could concurrently probe another driver in the group that might - * race vfio_device_remove_group() with vfio_get_group(), so we have to - * ensure that the sysfs is all cleaned up under lock otherwise the - * cdev_device_add() will fail due to the name aready existing. - */ - cdev_device_del(&group->cdev, &group->dev); - - mutex_lock(&group->group_lock); - /* - * These data structures all have paired operations that can only be - * undone when the caller holds a live reference on the device. Since - * all pairs must be undone these WARN_ON's indicate some caller did not - * properly hold the group reference. - */ - WARN_ON(!list_empty(&group->device_list)); - WARN_ON(group->notifier.head); - - /* - * Revoke all users of group->iommu_group. At this point we know there - * are no devices active because we are unplugging the last one. Setting - * iommu_group to NULL blocks all new users. - */ - if (group->container) - vfio_group_detach_container(group); - iommu_group = group->iommu_group; - group->iommu_group = NULL; - mutex_unlock(&group->group_lock); - mutex_unlock(&vfio.group_lock); - - iommu_group_put(iommu_group); - put_device(&group->dev); -} - /* * Device objects - create, release, get, put, search */ /* Device reference always implies a group reference */ -static void vfio_device_put_registration(struct vfio_device *device) +void vfio_device_put_registration(struct vfio_device *device) { if (refcount_dec_and_test(&device->refcount)) complete(&device->comp); } -static bool vfio_device_try_get_registration(struct vfio_device *device) +bool vfio_device_try_get_registration(struct vfio_device *device) { return refcount_inc_not_zero(&device->refcount); } -static struct vfio_device *vfio_group_get_device(struct vfio_group *group, - struct device *dev) -{ - struct vfio_device *device; - - mutex_lock(&group->device_lock); - list_for_each_entry(device, &group->device_list, group_next) { - if (device->dev == dev && - vfio_device_try_get_registration(device)) { - mutex_unlock(&group->device_lock); - return device; - } - } - mutex_unlock(&group->device_lock); - return NULL; -} - /* * VFIO driver API */ @@ -352,15 +158,15 @@ static void vfio_device_release(struct device *dev) vfio_release_device_set(device); ida_free(&vfio.device_ida, device->index); - /* - * kvfree() cannot be done here due to a life cycle mess in - * vfio-ccw. Before the ccw part is fixed all drivers are - * required to support @release and call vfio_free_device() - * from there. - */ - device->ops->release(device); + if (device->ops->release) + device->ops->release(device); + + kvfree(device); } +static int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops); + /* * Allocate and initialize vfio_device so it can be registered to vfio * core. @@ -399,11 +205,9 @@ EXPORT_SYMBOL_GPL(_vfio_alloc_device); /* * Initialize a vfio_device so it can be registered to vfio core. - * - * Only vfio-ccw driver should call this interface. */ -int vfio_init_device(struct vfio_device *device, struct device *dev, - const struct vfio_device_ops *ops) +static int vfio_init_device(struct vfio_device *device, struct device *dev, + const struct vfio_device_ops *ops) { int ret; @@ -435,107 +239,16 @@ out_uninit: ida_free(&vfio.device_ida, device->index); return ret; } -EXPORT_SYMBOL_GPL(vfio_init_device); - -/* - * The helper called by driver @release callback to free the device - * structure. Drivers which don't have private data to clean can - * simply use this helper as its @release. - */ -void vfio_free_device(struct vfio_device *device) -{ - kvfree(device); -} -EXPORT_SYMBOL_GPL(vfio_free_device); - -static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, - enum vfio_group_type type) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - int ret; - - iommu_group = iommu_group_alloc(); - if (IS_ERR(iommu_group)) - return ERR_CAST(iommu_group); - - ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); - if (ret) - goto out_put_group; - ret = iommu_group_add_device(iommu_group, dev); - if (ret) - goto out_put_group; - - group = vfio_create_group(iommu_group, type); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_remove_device; - } - iommu_group_put(iommu_group); - return group; - -out_remove_device: - iommu_group_remove_device(dev); -out_put_group: - iommu_group_put(iommu_group); - return ERR_PTR(ret); -} - -static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - - iommu_group = iommu_group_get(dev); - if (!iommu_group && vfio_noiommu) { - /* - * With noiommu enabled, create an IOMMU group for devices that - * don't already have one, implying no IOMMU hardware/driver - * exists. Taint the kernel because we're about to give a DMA - * capable device to a user without IOMMU protection. - */ - group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); - if (!IS_ERR(group)) { - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); - } - return group; - } - - if (!iommu_group) - return ERR_PTR(-EINVAL); - - /* - * VFIO always sets IOMMU_CACHE because we offer no way for userspace to - * restore cache coherency. It has to be checked here because it is only - * valid for cases where we are using iommu groups. - */ - if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { - iommu_group_put(iommu_group); - return ERR_PTR(-EINVAL); - } - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) - group = vfio_create_group(iommu_group, VFIO_IOMMU); - - /* The vfio_group holds a reference to the iommu_group */ - iommu_group_put(iommu_group); - return group; -} static int __vfio_register_dev(struct vfio_device *device, - struct vfio_group *group) + enum vfio_group_type type) { - struct vfio_device *existing_device; int ret; - /* - * In all cases group is the output of one of the group allocation - * functions and we have group->drivers incremented for us. - */ - if (IS_ERR(group)) - return PTR_ERR(group); + if (WARN_ON(device->ops->bind_iommufd && + (!device->ops->unbind_iommufd || + !device->ops->attach_ioas))) + return -EINVAL; /* * If the driver doesn't specify a set then the device is added to a @@ -544,25 +257,13 @@ static int __vfio_register_dev(struct vfio_device *device, if (!device->dev_set) vfio_assign_device_set(device, device); - existing_device = vfio_group_get_device(group, device->dev); - if (existing_device) { - /* - * group->iommu_group is non-NULL because we hold the drivers - * refcount. - */ - dev_WARN(device->dev, "Device already exists on group %d\n", - iommu_group_id(group->iommu_group)); - vfio_device_put_registration(existing_device); - ret = -EBUSY; - goto err_out; - } - - /* Our reference on group is moved to the device */ - device->group = group; - ret = dev_set_name(&device->device, "vfio%d", device->index); if (ret) - goto err_out; + return ret; + + ret = vfio_device_set_group(device, type); + if (ret) + return ret; ret = device_add(&device->device); if (ret) @@ -571,9 +272,7 @@ static int __vfio_register_dev(struct vfio_device *device, /* Refcounting can't start until the driver calls register */ refcount_set(&device->refcount, 1); - mutex_lock(&group->device_lock); - list_add(&device->group_next, &group->device_list); - mutex_unlock(&group->device_lock); + vfio_device_group_register(device); return 0; err_out: @@ -583,8 +282,7 @@ err_out: int vfio_register_group_dev(struct vfio_device *device) { - return __vfio_register_dev(device, - vfio_group_find_or_alloc(device->dev)); + return __vfio_register_dev(device, VFIO_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_group_dev); @@ -594,46 +292,15 @@ EXPORT_SYMBOL_GPL(vfio_register_group_dev); */ int vfio_register_emulated_iommu_dev(struct vfio_device *device) { - return __vfio_register_dev(device, - vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU)); + return __vfio_register_dev(device, VFIO_EMULATED_IOMMU); } EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev); -static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group, - char *buf) -{ - struct vfio_device *it, *device = ERR_PTR(-ENODEV); - - mutex_lock(&group->device_lock); - list_for_each_entry(it, &group->device_list, group_next) { - int ret; - - if (it->ops->match) { - ret = it->ops->match(it, buf); - if (ret < 0) { - device = ERR_PTR(ret); - break; - } - } else { - ret = !strcmp(dev_name(it->dev), buf); - } - - if (ret && vfio_device_try_get_registration(it)) { - device = it; - break; - } - } - mutex_unlock(&group->device_lock); - - return device; -} - /* * Decrement the device reference count and wait for the device to be * removed. Open file descriptors for the device... */ void vfio_unregister_group_dev(struct vfio_device *device) { - struct vfio_group *group = device->group; unsigned int i = 0; bool interrupted = false; long rc; @@ -661,333 +328,101 @@ void vfio_unregister_group_dev(struct vfio_device *device) } } - mutex_lock(&group->device_lock); - list_del(&device->group_next); - mutex_unlock(&group->device_lock); + vfio_device_group_unregister(device); /* Balances device_add in register path */ device_del(&device->device); + /* Balances vfio_device_set_group in register path */ vfio_device_remove_group(device); } EXPORT_SYMBOL_GPL(vfio_unregister_group_dev); -/* - * VFIO Group fd, /dev/vfio/$GROUP - */ -/* - * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or - * if there was no container to unset. Since the ioctl is called on - * the group, we know that still exists, therefore the only valid - * transition here is 1->0. - */ -static int vfio_group_ioctl_unset_container(struct vfio_group *group) +/* true if the vfio_device has open_device() called but not close_device() */ +static bool vfio_assert_device_open(struct vfio_device *device) { - int ret = 0; - - mutex_lock(&group->group_lock); - if (!group->container) { - ret = -EINVAL; - goto out_unlock; - } - if (group->container_users != 1) { - ret = -EBUSY; - goto out_unlock; - } - vfio_group_detach_container(group); - -out_unlock: - mutex_unlock(&group->group_lock); - return ret; + return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); } -static int vfio_group_ioctl_set_container(struct vfio_group *group, - int __user *arg) +static int vfio_device_first_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm) { - struct vfio_container *container; - struct fd f; int ret; - int fd; - if (get_user(fd, arg)) - return -EFAULT; + lockdep_assert_held(&device->dev_set->lock); - f = fdget(fd); - if (!f.file) - return -EBADF; + if (!try_module_get(device->dev->driver->owner)) + return -ENODEV; - mutex_lock(&group->group_lock); - if (group->container || WARN_ON(group->container_users)) { - ret = -EINVAL; - goto out_unlock; - } - if (!group->iommu_group) { - ret = -ENODEV; - goto out_unlock; - } + if (iommufd) + ret = vfio_iommufd_bind(device, iommufd); + else + ret = vfio_device_group_use_iommu(device); + if (ret) + goto err_module_put; - container = vfio_container_from_file(f.file); - ret = -EINVAL; - if (container) { - ret = vfio_container_attach_group(container, group); - goto out_unlock; + device->kvm = kvm; + if (device->ops->open_device) { + ret = device->ops->open_device(device); + if (ret) + goto err_unuse_iommu; } + return 0; -out_unlock: - mutex_unlock(&group->group_lock); - fdput(f); +err_unuse_iommu: + device->kvm = NULL; + if (iommufd) + vfio_iommufd_unbind(device); + else + vfio_device_group_unuse_iommu(device); +err_module_put: + module_put(device->dev->driver->owner); return ret; } -static const struct file_operations vfio_device_fops; - -/* true if the vfio_device has open_device() called but not close_device() */ -bool vfio_assert_device_open(struct vfio_device *device) +static void vfio_device_last_close(struct vfio_device *device, + struct iommufd_ctx *iommufd) { - return !WARN_ON_ONCE(!READ_ONCE(device->open_count)); + lockdep_assert_held(&device->dev_set->lock); + + if (device->ops->close_device) + device->ops->close_device(device); + device->kvm = NULL; + if (iommufd) + vfio_iommufd_unbind(device); + else + vfio_device_group_unuse_iommu(device); + module_put(device->dev->driver->owner); } -static struct file *vfio_device_open(struct vfio_device *device) +int vfio_device_open(struct vfio_device *device, + struct iommufd_ctx *iommufd, struct kvm *kvm) { - struct file *filep; - int ret; - - mutex_lock(&device->group->group_lock); - ret = vfio_device_assign_container(device); - mutex_unlock(&device->group->group_lock); - if (ret) - return ERR_PTR(ret); - - if (!try_module_get(device->dev->driver->owner)) { - ret = -ENODEV; - goto err_unassign_container; - } + int ret = 0; mutex_lock(&device->dev_set->lock); device->open_count++; if (device->open_count == 1) { - /* - * Here we pass the KVM pointer with the group under the read - * lock. If the device driver will use it, it must obtain a - * reference and release it during close_device. - */ - mutex_lock(&device->group->group_lock); - device->kvm = device->group->kvm; - - if (device->ops->open_device) { - ret = device->ops->open_device(device); - if (ret) - goto err_undo_count; - } - vfio_device_container_register(device); - mutex_unlock(&device->group->group_lock); - } - mutex_unlock(&device->dev_set->lock); - - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - device, O_RDWR); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_close_device; - } - - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE); - - if (device->group->type == VFIO_NO_IOMMU) - dev_warn(device->dev, "vfio-noiommu device opened by user " - "(%s:%d)\n", current->comm, task_pid_nr(current)); - /* - * On success the ref of device is moved to the file and - * put in vfio_device_fops_release() - */ - return filep; - -err_close_device: - mutex_lock(&device->dev_set->lock); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1) { - if (device->ops->close_device) - device->ops->close_device(device); - - vfio_device_container_unregister(device); + ret = vfio_device_first_open(device, iommufd, kvm); + if (ret) + device->open_count--; } -err_undo_count: - mutex_unlock(&device->group->group_lock); - device->open_count--; - if (device->open_count == 0 && device->kvm) - device->kvm = NULL; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); -err_unassign_container: - vfio_device_unassign_container(device); - return ERR_PTR(ret); -} - -static int vfio_group_ioctl_get_device_fd(struct vfio_group *group, - char __user *arg) -{ - struct vfio_device *device; - struct file *filep; - char *buf; - int fdno; - int ret; - - buf = strndup_user(arg, PAGE_SIZE); - if (IS_ERR(buf)) - return PTR_ERR(buf); - device = vfio_device_get_from_name(group, buf); - kfree(buf); - if (IS_ERR(device)) - return PTR_ERR(device); - - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - ret = fdno; - goto err_put_device; - } - - filep = vfio_device_open(device); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_put_fdno; - } - - fd_install(fdno, filep); - return fdno; - -err_put_fdno: - put_unused_fd(fdno); -err_put_device: - vfio_device_put_registration(device); - return ret; -} - -static int vfio_group_ioctl_get_status(struct vfio_group *group, - struct vfio_group_status __user *arg) -{ - unsigned long minsz = offsetofend(struct vfio_group_status, flags); - struct vfio_group_status status; - - if (copy_from_user(&status, arg, minsz)) - return -EFAULT; - - if (status.argsz < minsz) - return -EINVAL; - - status.flags = 0; - - mutex_lock(&group->group_lock); - if (!group->iommu_group) { - mutex_unlock(&group->group_lock); - return -ENODEV; - } - - if (group->container) - status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET | - VFIO_GROUP_FLAGS_VIABLE; - else if (!iommu_group_dma_owner_claimed(group->iommu_group)) - status.flags |= VFIO_GROUP_FLAGS_VIABLE; - mutex_unlock(&group->group_lock); - - if (copy_to_user(arg, &status, minsz)) - return -EFAULT; - return 0; -} - -static long vfio_group_fops_unl_ioctl(struct file *filep, - unsigned int cmd, unsigned long arg) -{ - struct vfio_group *group = filep->private_data; - void __user *uarg = (void __user *)arg; - - switch (cmd) { - case VFIO_GROUP_GET_DEVICE_FD: - return vfio_group_ioctl_get_device_fd(group, uarg); - case VFIO_GROUP_GET_STATUS: - return vfio_group_ioctl_get_status(group, uarg); - case VFIO_GROUP_SET_CONTAINER: - return vfio_group_ioctl_set_container(group, uarg); - case VFIO_GROUP_UNSET_CONTAINER: - return vfio_group_ioctl_unset_container(group); - default: - return -ENOTTY; - } -} - -static int vfio_group_fops_open(struct inode *inode, struct file *filep) -{ - struct vfio_group *group = - container_of(inode->i_cdev, struct vfio_group, cdev); - int ret; - - mutex_lock(&group->group_lock); - - /* - * drivers can be zero if this races with vfio_device_remove_group(), it - * will be stable at 0 under the group rwsem - */ - if (refcount_read(&group->drivers) == 0) { - ret = -ENODEV; - goto out_unlock; - } - - if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) { - ret = -EPERM; - goto out_unlock; - } - - /* - * Do we need multiple instances of the group open? Seems not. - */ - if (group->opened_file) { - ret = -EBUSY; - goto out_unlock; - } - group->opened_file = filep; - filep->private_data = group; - ret = 0; -out_unlock: - mutex_unlock(&group->group_lock); return ret; } -static int vfio_group_fops_release(struct inode *inode, struct file *filep) +void vfio_device_close(struct vfio_device *device, + struct iommufd_ctx *iommufd) { - struct vfio_group *group = filep->private_data; - - filep->private_data = NULL; - - mutex_lock(&group->group_lock); - /* - * Device FDs hold a group file reference, therefore the group release - * is only called when there are no open devices. - */ - WARN_ON(group->notifier.head); - if (group->container) - vfio_group_detach_container(group); - group->opened_file = NULL; - mutex_unlock(&group->group_lock); - return 0; + mutex_lock(&device->dev_set->lock); + vfio_assert_device_open(device); + if (device->open_count == 1) + vfio_device_last_close(device, iommufd); + device->open_count--; + mutex_unlock(&device->dev_set->lock); } -static const struct file_operations vfio_group_fops = { - .owner = THIS_MODULE, - .unlocked_ioctl = vfio_group_fops_unl_ioctl, - .compat_ioctl = compat_ptr_ioctl, - .open = vfio_group_fops_open, - .release = vfio_group_fops_release, -}; - /* * Wrapper around pm_runtime_resume_and_get(). * Return error code on failure or 0 on success. @@ -1028,24 +463,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) { struct vfio_device *device = filep->private_data; - mutex_lock(&device->dev_set->lock); - vfio_assert_device_open(device); - mutex_lock(&device->group->group_lock); - if (device->open_count == 1) { - if (device->ops->close_device) - device->ops->close_device(device); - - vfio_device_container_unregister(device); - } - mutex_unlock(&device->group->group_lock); - device->open_count--; - if (device->open_count == 0) - device->kvm = NULL; - mutex_unlock(&device->dev_set->lock); - - module_put(device->dev->driver->owner); - - vfio_device_unassign_container(device); + vfio_device_group_close(device); vfio_device_put_registration(device); @@ -1072,7 +490,7 @@ int vfio_mig_get_next_state(struct vfio_device *device, enum vfio_device_mig_state new_fsm, enum vfio_device_mig_state *next_fsm) { - enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 }; + enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 }; /* * The coding in this table requires the driver to implement the * following FSM arcs: @@ -1087,30 +505,65 @@ int vfio_mig_get_next_state(struct vfio_device *device, * RUNNING_P2P -> RUNNING * RUNNING_P2P -> STOP * STOP -> RUNNING_P2P - * Without P2P the driver must implement: + * + * If precopy is supported then the driver must support these additional + * FSM arcs: + * RUNNING -> PRE_COPY + * PRE_COPY -> RUNNING + * PRE_COPY -> STOP_COPY + * However, if precopy and P2P are supported together then the driver + * must support these additional arcs beyond the P2P arcs above: + * PRE_COPY -> RUNNING + * PRE_COPY -> PRE_COPY_P2P + * PRE_COPY_P2P -> PRE_COPY + * PRE_COPY_P2P -> RUNNING_P2P + * PRE_COPY_P2P -> STOP_COPY + * RUNNING -> PRE_COPY + * RUNNING_P2P -> PRE_COPY_P2P + * + * Without P2P and precopy the driver must implement: * RUNNING -> STOP * STOP -> RUNNING * * The coding will step through multiple states for some combination * transitions; if all optional features are supported, this means the * following ones: + * PRE_COPY -> PRE_COPY_P2P -> STOP_COPY + * PRE_COPY -> RUNNING -> RUNNING_P2P + * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP + * PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING + * PRE_COPY_P2P -> RUNNING_P2P -> RUNNING + * PRE_COPY_P2P -> RUNNING_P2P -> STOP + * PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING * RESUMING -> STOP -> RUNNING_P2P + * RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P * RESUMING -> STOP -> RUNNING_P2P -> RUNNING + * RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * RESUMING -> STOP -> STOP_COPY + * RUNNING -> RUNNING_P2P -> PRE_COPY_P2P * RUNNING -> RUNNING_P2P -> STOP * RUNNING -> RUNNING_P2P -> STOP -> RESUMING * RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY + * RUNNING_P2P -> RUNNING -> PRE_COPY * RUNNING_P2P -> STOP -> RESUMING * RUNNING_P2P -> STOP -> STOP_COPY + * STOP -> RUNNING_P2P -> PRE_COPY_P2P * STOP -> RUNNING_P2P -> RUNNING + * STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY * STOP_COPY -> STOP -> RESUMING * STOP_COPY -> STOP -> RUNNING_P2P * STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING + * + * The following transitions are blocked: + * STOP_COPY -> PRE_COPY + * STOP_COPY -> PRE_COPY_P2P */ static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, @@ -1119,14 +572,38 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RUNNING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, }, + [VFIO_DEVICE_STATE_PRE_COPY] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = { + [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, + [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, + [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, + [VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR, + }, [VFIO_DEVICE_STATE_STOP_COPY] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, @@ -1135,6 +612,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RESUMING] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP, @@ -1143,6 +622,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_RUNNING_P2P] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P, @@ -1151,6 +632,8 @@ int vfio_mig_get_next_state(struct vfio_device *device, [VFIO_DEVICE_STATE_ERROR] = { [VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR, [VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR, @@ -1161,6 +644,11 @@ int vfio_mig_get_next_state(struct vfio_device *device, static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = { [VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY, + [VFIO_DEVICE_STATE_PRE_COPY] = + VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY, + [VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY | + VFIO_MIGRATION_P2P | + VFIO_MIGRATION_PRE_COPY, [VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY, [VFIO_DEVICE_STATE_RUNNING_P2P] = @@ -1272,6 +760,34 @@ out_copy: return 0; } +static int +vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device, + u32 flags, void __user *arg, + size_t argsz) +{ + struct vfio_device_feature_mig_data_size data_size = {}; + unsigned long stop_copy_length; + int ret; + + if (!device->mig_ops) + return -ENOTTY; + + ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET, + sizeof(data_size)); + if (ret != 1) + return ret; + + ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length); + if (ret) + return ret; + + data_size.stop_copy_length = stop_copy_length; + if (copy_to_user(arg, &data_size, sizeof(data_size))) + return -EFAULT; + + return 0; +} + static int vfio_ioctl_device_feature_migration(struct vfio_device *device, u32 flags, void __user *arg, size_t argsz) @@ -1499,6 +1015,10 @@ static int vfio_ioctl_device_feature(struct vfio_device *device, return vfio_ioctl_device_feature_logging_report( device, feature.flags, arg->data, feature.argsz - minsz); + case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE: + return vfio_ioctl_device_feature_migration_data_size( + device, feature.flags, arg->data, + feature.argsz - minsz); default: if (unlikely(!device->ops->device_feature)) return -EINVAL; @@ -1568,7 +1088,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma) return device->ops->mmap(device, vma); } -static const struct file_operations vfio_device_fops = { +const struct file_operations vfio_device_fops = { .owner = THIS_MODULE, .release = vfio_device_fops_release, .read = vfio_device_fops_read, @@ -1578,118 +1098,6 @@ static const struct file_operations vfio_device_fops = { .mmap = vfio_device_fops_mmap, }; -/** - * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file - * @file: VFIO group file - * - * The returned iommu_group is valid as long as a ref is held on the file. This - * returns a reference on the group. This function is deprecated, only the SPAPR - * path in kvm should call it. - */ -struct iommu_group *vfio_file_iommu_group(struct file *file) -{ - struct vfio_group *group = file->private_data; - struct iommu_group *iommu_group = NULL; - - if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU)) - return NULL; - - if (!vfio_file_is_group(file)) - return NULL; - - mutex_lock(&group->group_lock); - if (group->iommu_group) { - iommu_group = group->iommu_group; - iommu_group_ref_get(iommu_group); - } - mutex_unlock(&group->group_lock); - return iommu_group; -} -EXPORT_SYMBOL_GPL(vfio_file_iommu_group); - -/** - * vfio_file_is_group - True if the file is usable with VFIO aPIS - * @file: VFIO group file - */ -bool vfio_file_is_group(struct file *file) -{ - return file->f_op == &vfio_group_fops; -} -EXPORT_SYMBOL_GPL(vfio_file_is_group); - -/** - * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file - * is always CPU cache coherent - * @file: VFIO group file - * - * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop - * bit in DMA transactions. A return of false indicates that the user has - * rights to access additional instructions such as wbinvd on x86. - */ -bool vfio_file_enforced_coherent(struct file *file) -{ - struct vfio_group *group = file->private_data; - bool ret; - - if (!vfio_file_is_group(file)) - return true; - - mutex_lock(&group->group_lock); - if (group->container) { - ret = vfio_container_ioctl_check_extension(group->container, - VFIO_DMA_CC_IOMMU); - } else { - /* - * Since the coherency state is determined only once a container - * is attached the user must do so before they can prove they - * have permission. - */ - ret = true; - } - mutex_unlock(&group->group_lock); - return ret; -} -EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); - -/** - * vfio_file_set_kvm - Link a kvm with VFIO drivers - * @file: VFIO group file - * @kvm: KVM to link - * - * When a VFIO device is first opened the KVM will be available in - * device->kvm if one was associated with the group. - */ -void vfio_file_set_kvm(struct file *file, struct kvm *kvm) -{ - struct vfio_group *group = file->private_data; - - if (!vfio_file_is_group(file)) - return; - - mutex_lock(&group->group_lock); - group->kvm = kvm; - mutex_unlock(&group->group_lock); -} -EXPORT_SYMBOL_GPL(vfio_file_set_kvm); - -/** - * vfio_file_has_dev - True if the VFIO file is a handle for device - * @file: VFIO file to check - * @device: Device that must be part of the file - * - * Returns true if given file has permission to manipulate the given device. - */ -bool vfio_file_has_dev(struct file *file, struct vfio_device *device) -{ - struct vfio_group *group = file->private_data; - - if (!vfio_file_is_group(file)) - return false; - - return group == device->group; -} -EXPORT_SYMBOL_GPL(vfio_file_has_dev); - /* * Sub-module support */ @@ -1810,34 +1218,139 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs, EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare); /* - * Module/class support + * Pin contiguous user pages and return their associated host pages for local + * domain only. + * @device [in] : device + * @iova [in] : starting IOVA of user pages to be pinned. + * @npage [in] : count of pages to be pinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + * @prot [in] : protection flags + * @pages[out] : array of host pages + * Return error or number of pages pinned. + * + * A driver may only call this function if the vfio_device was created + * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages(). */ -static char *vfio_devnode(struct device *dev, umode_t *mode) +int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova, + int npage, int prot, struct page **pages) { - return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev)); + /* group->container cannot change while a vfio device is open */ + if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device))) + return -EINVAL; + if (vfio_device_has_container(device)) + return vfio_device_container_pin_pages(device, iova, + npage, prot, pages); + if (device->iommufd_access) { + int ret; + + if (iova > ULONG_MAX) + return -EINVAL; + /* + * VFIO ignores the sub page offset, npages is from the start of + * a PAGE_SIZE chunk of IOVA. The caller is expected to recover + * the sub page offset by doing: + * pages[0] + (iova % PAGE_SIZE) + */ + ret = iommufd_access_pin_pages( + device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE, pages, + (prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0); + if (ret) + return ret; + return npage; + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_pin_pages); + +/* + * Unpin contiguous host pages for local domain only. + * @device [in] : device + * @iova [in] : starting address of user pages to be unpinned. + * @npage [in] : count of pages to be unpinned. This count should not + * be greater than VFIO_PIN_PAGES_MAX_ENTRIES. + */ +void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage) +{ + if (WARN_ON(!vfio_assert_device_open(device))) + return; + + if (vfio_device_has_container(device)) { + vfio_device_container_unpin_pages(device, iova, npage); + return; + } + if (device->iommufd_access) { + if (WARN_ON(iova > ULONG_MAX)) + return; + iommufd_access_unpin_pages(device->iommufd_access, + ALIGN_DOWN(iova, PAGE_SIZE), + npage * PAGE_SIZE); + return; + } } +EXPORT_SYMBOL(vfio_unpin_pages); +/* + * This interface allows the CPUs to perform some sort of virtual DMA on + * behalf of the device. + * + * CPUs read/write from/into a range of IOVAs pointing to user space memory + * into/from a kernel buffer. + * + * As the read/write of user space memory is conducted via the CPUs and is + * not a real device DMA, it is not necessary to pin the user space memory. + * + * @device [in] : VFIO device + * @iova [in] : base IOVA of a user space buffer + * @data [in] : pointer to kernel buffer + * @len [in] : kernel buffer length + * @write : indicate read or write + * Return error code on failure or 0 on success. + */ +int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data, + size_t len, bool write) +{ + if (!data || len <= 0 || !vfio_assert_device_open(device)) + return -EINVAL; + + if (vfio_device_has_container(device)) + return vfio_device_container_dma_rw(device, iova, + data, len, write); + + if (device->iommufd_access) { + unsigned int flags = 0; + + if (iova > ULONG_MAX) + return -EINVAL; + + /* VFIO historically tries to auto-detect a kthread */ + if (!current->mm) + flags |= IOMMUFD_ACCESS_RW_KTHREAD; + if (write) + flags |= IOMMUFD_ACCESS_RW_WRITE; + return iommufd_access_rw(device->iommufd_access, iova, data, + len, flags); + } + return -EINVAL; +} +EXPORT_SYMBOL(vfio_dma_rw); + +/* + * Module/class support + */ static int __init vfio_init(void) { int ret; - ida_init(&vfio.group_ida); ida_init(&vfio.device_ida); - mutex_init(&vfio.group_lock); - INIT_LIST_HEAD(&vfio.group_list); - ret = vfio_container_init(); + ret = vfio_group_init(); if (ret) return ret; - /* /dev/vfio/$GROUP */ - vfio.class = class_create(THIS_MODULE, "vfio"); - if (IS_ERR(vfio.class)) { - ret = PTR_ERR(vfio.class); - goto err_group_class; - } - - vfio.class->devnode = vfio_devnode; + ret = vfio_virqfd_init(); + if (ret) + goto err_virqfd; /* /sys/class/vfio-dev/vfioX */ vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); @@ -1846,36 +1359,23 @@ static int __init vfio_init(void) goto err_dev_class; } - ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio"); - if (ret) - goto err_alloc_chrdev; - pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; -err_alloc_chrdev: - class_destroy(vfio.device_class); - vfio.device_class = NULL; err_dev_class: - class_destroy(vfio.class); - vfio.class = NULL; -err_group_class: - vfio_container_cleanup(); + vfio_virqfd_exit(); +err_virqfd: + vfio_group_cleanup(); return ret; } static void __exit vfio_cleanup(void) { - WARN_ON(!list_empty(&vfio.group_list)); - ida_destroy(&vfio.device_ida); - ida_destroy(&vfio.group_ida); - unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); class_destroy(vfio.device_class); vfio.device_class = NULL; - class_destroy(vfio.class); - vfio_container_cleanup(); - vfio.class = NULL; + vfio_virqfd_exit(); + vfio_group_cleanup(); xa_destroy(&vfio_device_set_xa); } @@ -1886,6 +1386,4 @@ MODULE_VERSION(DRIVER_VERSION); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR(DRIVER_AUTHOR); MODULE_DESCRIPTION(DRIVER_DESC); -MODULE_ALIAS_MISCDEV(VFIO_MINOR); -MODULE_ALIAS("devname:vfio/vfio"); MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce"); diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c deleted file mode 100644 index 67f55ac1d459cc18a192fa580d9e5e5df2962914..0000000000000000000000000000000000000000 --- a/drivers/vfio/vfio_spapr_eeh.c +++ /dev/null @@ -1,107 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * EEH functionality support for VFIO devices. The feature is only - * available on sPAPR compatible platforms. - * - * Copyright Gavin Shan, IBM Corporation 2014. - */ - -#include -#include -#include -#include - -#define DRIVER_VERSION "0.1" -#define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" -#define DRIVER_DESC "VFIO IOMMU SPAPR EEH" - -/* We might build address mapping here for "fast" path later */ -void vfio_spapr_pci_eeh_open(struct pci_dev *pdev) -{ - eeh_dev_open(pdev); -} -EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open); - -void vfio_spapr_pci_eeh_release(struct pci_dev *pdev) -{ - eeh_dev_release(pdev); -} -EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release); - -long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, - unsigned int cmd, unsigned long arg) -{ - struct eeh_pe *pe; - struct vfio_eeh_pe_op op; - unsigned long minsz; - long ret = -EINVAL; - - switch (cmd) { - case VFIO_CHECK_EXTENSION: - if (arg == VFIO_EEH) - ret = eeh_enabled() ? 1 : 0; - else - ret = 0; - break; - case VFIO_EEH_PE_OP: - pe = eeh_iommu_group_to_pe(group); - if (!pe) - return -ENODEV; - - minsz = offsetofend(struct vfio_eeh_pe_op, op); - if (copy_from_user(&op, (void __user *)arg, minsz)) - return -EFAULT; - if (op.argsz < minsz || op.flags) - return -EINVAL; - - switch (op.op) { - case VFIO_EEH_PE_DISABLE: - ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE); - break; - case VFIO_EEH_PE_ENABLE: - ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE); - break; - case VFIO_EEH_PE_UNFREEZE_IO: - ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO); - break; - case VFIO_EEH_PE_UNFREEZE_DMA: - ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA); - break; - case VFIO_EEH_PE_GET_STATE: - ret = eeh_pe_get_state(pe); - break; - case VFIO_EEH_PE_RESET_DEACTIVATE: - ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true); - break; - case VFIO_EEH_PE_RESET_HOT: - ret = eeh_pe_reset(pe, EEH_RESET_HOT, true); - break; - case VFIO_EEH_PE_RESET_FUNDAMENTAL: - ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true); - break; - case VFIO_EEH_PE_CONFIGURE: - ret = eeh_pe_configure(pe); - break; - case VFIO_EEH_PE_INJECT_ERR: - minsz = offsetofend(struct vfio_eeh_pe_op, err.mask); - if (op.argsz < minsz) - return -EINVAL; - if (copy_from_user(&op, (void __user *)arg, minsz)) - return -EFAULT; - - ret = eeh_pe_inject_err(pe, op.err.type, op.err.func, - op.err.addr, op.err.mask); - break; - default: - ret = -EINVAL; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl); - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c index 414e98d82b02e561d5423910d0cb572b7c4ea7d3..497a17b378656888dcc1b27ef2e3fbfb61450e79 100644 --- a/drivers/vfio/virqfd.c +++ b/drivers/vfio/virqfd.c @@ -12,15 +12,12 @@ #include #include #include - -#define DRIVER_VERSION "0.1" -#define DRIVER_AUTHOR "Alex Williamson " -#define DRIVER_DESC "IRQFD support for VFIO bus drivers" +#include "vfio.h" static struct workqueue_struct *vfio_irqfd_cleanup_wq; static DEFINE_SPINLOCK(virqfd_lock); -static int __init vfio_virqfd_init(void) +int __init vfio_virqfd_init(void) { vfio_irqfd_cleanup_wq = create_singlethread_workqueue("vfio-irqfd-cleanup"); @@ -30,7 +27,7 @@ static int __init vfio_virqfd_init(void) return 0; } -static void __exit vfio_virqfd_exit(void) +void vfio_virqfd_exit(void) { destroy_workqueue(vfio_irqfd_cleanup_wq); } @@ -216,11 +213,3 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd) flush_workqueue(vfio_irqfd_cleanup_wq); } EXPORT_SYMBOL_GPL(vfio_virqfd_disable); - -module_init(vfio_virqfd_init); -module_exit(vfio_virqfd_exit); - -MODULE_VERSION(DRIVER_VERSION); -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR(DRIVER_AUTHOR); -MODULE_DESCRIPTION(DRIVER_DESC); diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 71019b167f8b0db6c49d1eb1048d9ebbb9430182..df6e09f7d242226a229b605b3623aad8a0d7fbdf 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -609,6 +609,7 @@ config FB_TGA config FB_UVESA tristate "Userspace VESA VGA graphics support" depends on FB && CONNECTOR + depends on !UML select FB_CFB_FILLRECT select FB_CFB_COPYAREA select FB_CFB_IMAGEBLIT @@ -2254,7 +2255,6 @@ config FB_SSD1307 select FB_SYS_COPYAREA select FB_SYS_IMAGEBLIT select FB_DEFERRED_IO - select PWM select FB_BACKLIGHT help This driver implements support for the Solomon SSD1307 diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c index 6bbcd9fc864e9eba589dcb2a08aab0e127e51e8c..77dbf94aae5f4fdacf5e340fb89453b6b71dba82 100644 --- a/drivers/video/fbdev/controlfb.c +++ b/drivers/video/fbdev/controlfb.c @@ -376,7 +376,7 @@ static int read_control_sense(struct fb_info_control *p) #define CONTROL_PIXCLOCK_MIN 5000 /* ~ 200 MHz dot clock */ /* - * calculate the clock paramaters to be sent to CUDA according to given + * calculate the clock parameters to be sent to CUDA according to given * pixclock in pico second. */ static int calc_clock_params(unsigned long clk, unsigned char *param) diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index c0143d38df83a09946aa38bee9a019ebc7fcd4be..14a7d404062c38e006933da5d42a729b8f9fbf07 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -2450,7 +2450,8 @@ err_out: if (userfont) { p->userfont = old_userfont; - REFCOUNT(data)--; + if (--REFCOUNT(data) == 0) + kfree(data - FONT_EXTRA_WORDS * sizeof(int)); } vc->vc_font.width = old_width; diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c index 11922b009ed7cefc34bdcf9f5d9824276c093e59..cd07e401b32685992a4bce14b5a7f15908431e89 100644 --- a/drivers/video/fbdev/da8xx-fb.c +++ b/drivers/video/fbdev/da8xx-fb.c @@ -1431,7 +1431,7 @@ static int fb_probe(struct platform_device *device) dev_err(&device->dev, "GLCD: kmalloc for frame buffer failed\n"); ret = -EINVAL; - goto err_release_fb; + goto err_disable_reg; } da8xx_fb_info->screen_base = (char __iomem *) par->vram_virt; @@ -1475,7 +1475,7 @@ static int fb_probe(struct platform_device *device) ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0); if (ret) - goto err_release_fb; + goto err_disable_reg; da8xx_fb_info->cmap.len = par->palette_sz; /* initialize var_screeninfo */ @@ -1529,6 +1529,9 @@ err_cpu_freq: err_dealloc_cmap: fb_dealloc_cmap(&da8xx_fb_info->cmap); +err_disable_reg: + if (par->lcd_supply) + regulator_disable(par->lcd_supply); err_release_fb: framebuffer_release(da8xx_fb_info); diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c index 2398b3d48fedf8f67074192dd19cd537430a0fe7..305f1587bd8988676c9327351dc0cfc08979adfc 100644 --- a/drivers/video/fbdev/ep93xx-fb.c +++ b/drivers/video/fbdev/ep93xx-fb.c @@ -552,12 +552,14 @@ static int ep93xxfb_probe(struct platform_device *pdev) err = register_framebuffer(info); if (err) - goto failed_check; + goto failed_framebuffer; dev_info(info->dev, "registered. Mode = %dx%d-%d\n", info->var.xres, info->var.yres, info->var.bits_per_pixel); return 0; +failed_framebuffer: + clk_disable_unprepare(fbi->clk); failed_check: if (fbi->mach_info->teardown) fbi->mach_info->teardown(pdev); diff --git a/drivers/video/fbdev/geode/Kconfig b/drivers/video/fbdev/geode/Kconfig index 2f8f0fb1dae2fec454f6e27e51119bfd4f4bd8c6..b184085a78c243636dca31bee29feb37ef569c4c 100644 --- a/drivers/video/fbdev/geode/Kconfig +++ b/drivers/video/fbdev/geode/Kconfig @@ -5,6 +5,7 @@ config FB_GEODE bool "AMD Geode family framebuffer support" depends on FB && PCI && (X86_32 || (X86 && COMPILE_TEST)) + depends on !UML help Say 'Y' here to allow you to select framebuffer drivers for the AMD Geode family of processors. diff --git a/drivers/video/fbdev/matrox/matroxfb_maven.c b/drivers/video/fbdev/matrox/matroxfb_maven.c index f2e02958673dfd608f5c54230a74088870b4e978..727a10a59811b38d701f12159ce5201ee8d18bea 100644 --- a/drivers/video/fbdev/matrox/matroxfb_maven.c +++ b/drivers/video/fbdev/matrox/matroxfb_maven.c @@ -1249,8 +1249,7 @@ static int maven_shutdown_client(struct i2c_client* clnt) { return 0; } -static int maven_probe(struct i2c_client *client, - const struct i2c_device_id *id) +static int maven_probe(struct i2c_client *client) { struct i2c_adapter *adapter = client->adapter; int err = -ENODEV; @@ -1292,7 +1291,7 @@ static struct i2c_driver maven_driver={ .driver = { .name = "maven", }, - .probe = maven_probe, + .probe_new = maven_probe, .remove = maven_remove, .id_table = maven_id, }; diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c index a9fd732f81030056607ccb468f41f1241e300384..0daaf9f89bab5cf638aab7594c2854a05558e8e3 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c @@ -12,7 +12,6 @@ #include #include